summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp20866
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp2474
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp546
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp1165
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h142
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4646
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2330
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp4313
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1092
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h962
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp561
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1444
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp5122
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp625
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h166
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp804
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3188
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp1033
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h193
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp276
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp9631
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp298
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp10559
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h892
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp963
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3665
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp304
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp1067
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h128
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp7368
31 files changed, 86840 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000000000..e8950b58d42df
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,20866 @@
+//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dagcombine"
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+STATISTIC(SlicedLoads, "Number of load sliced");
+STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
+
+static cl::opt<bool>
+CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+static cl::opt<bool>
+UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+static cl::opt<std::string>
+CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
+#endif
+
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+static cl::opt<bool>
+StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load slicing"),
+ cl::init(false));
+
+static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
+
+static cl::opt<bool>
+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable merging multiple stores "
+ "into a wider store"));
+
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
+static cl::opt<unsigned> StoreMergeDependenceLimit(
+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the number of times for the same StoreNode and RootNode "
+ "to bail out in store merging dependence check"));
+
+namespace {
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations = false;
+ bool LegalTypes = false;
+ bool ForCodeSize;
+
+ /// Worklist of all of the nodes that need to be simplified.
+ ///
+ /// This must behave as a stack -- new nodes to process are pushed onto the
+ /// back and when processing we pop off of the back.
+ ///
+ /// The worklist will not contain duplicates but may contain null entries
+ /// due to nodes being deleted from the underlying DAG.
+ SmallVector<SDNode *, 64> Worklist;
+
+ /// Mapping from an SDNode to its position on the worklist.
+ ///
+ /// This is used to find and remove nodes from the worklist (by nulling
+ /// them) when they are deleted from the underlying DAG. It relies on
+ /// stable indices of nodes within the worklist.
+ DenseMap<SDNode *, unsigned> WorklistMap;
+ /// This records all nodes attempted to add to the worklist since we
+ /// considered a new worklist entry. As we keep do not add duplicate nodes
+ /// in the worklist, this is different from the tail of the worklist.
+ SmallSetVector<SDNode *, 32> PruningList;
+
+ /// Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 32> CombinedNodes;
+
+ /// Map from candidate StoreNode to the pair of RootNode and count.
+ /// The count is used to track how many times we have seen the StoreNode
+ /// with the same RootNode bail out in dependence check. If we have seen
+ /// the bail out for the same pair many times over a limit, we won't
+ /// consider the StoreNode with the same RootNode as store merging
+ /// candidate again.
+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis *AA;
+
+ /// When an instruction is simplified, add all users of the instruction to
+ /// the work lists because they might get more simplified now.
+ void AddUsersToWorklist(SDNode *N) {
+ for (SDNode *Node : N->uses())
+ AddToWorklist(Node);
+ }
+
+ // Prune potentially dangling nodes. This is called after
+ // any visit to a node, but should also be called during a visit after any
+ // failed combine which may have created a DAG node.
+ void clearAddedDanglingWorklistEntries() {
+ // Check any nodes added to the worklist to see if they are prunable.
+ while (!PruningList.empty()) {
+ auto *N = PruningList.pop_back_val();
+ if (N->use_empty())
+ recursivelyDeleteUnusedNodes(N);
+ }
+ }
+
+ SDNode *getNextWorklistEntry() {
+ // Before we do any work, remove nodes that are not in use.
+ clearAddedDanglingWorklistEntries();
+ SDNode *N = nullptr;
+ // The Worklist holds the SDNodes in order, but it may contain null
+ // entries.
+ while (!N && !Worklist.empty()) {
+ N = Worklist.pop_back_val();
+ }
+
+ if (N) {
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+ }
+ return N;
+ }
+
+ /// Call the node-specific routine that folds each particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), AA(AA) {
+ ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
+ }
+
+ void ConsiderForPruning(SDNode *N) {
+ // Mark this for potential pruning.
+ PruningList.insert(N);
+ }
+
+ /// Add to the worklist making sure its instance is at the back (next to be
+ /// processed.)
+ void AddToWorklist(SDNode *N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
+ // Skip handle nodes as they can't usefully be combined and confuse the
+ // zero-use deletion strategy.
+ if (N->getOpcode() == ISD::HANDLENODE)
+ return;
+
+ ConsiderForPruning(N);
+
+ if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ Worklist.push_back(N);
+ }
+
+ /// Remove all instances of N from the worklist.
+ void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+ PruningList.remove(N);
+ StoreRootCountMap.erase(N);
+
+ auto It = WorklistMap.find(N);
+ if (It == WorklistMap.end())
+ return; // Not in the worklist.
+
+ // Null out the entry rather than erasing it to avoid a linear operation.
+ Worklist[It->second] = nullptr;
+ WorklistMap.erase(It);
+ }
+
+ void deleteAndRecombine(SDNode *N);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+ unsigned MaximumLegalStoreInBits;
+
+ /// Check the specified integer node value to see if it can be simplified or
+ /// if things it uses can be simplified by bit propagation.
+ /// If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, DemandedBits);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
+ }
+
+ /// Check the specified vector node value to see if it can be simplified or
+ /// if things it uses can be simplified as it only uses some of the
+ /// elements. If so, return true.
+ bool SimplifyDemandedVectorElts(SDValue Op) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, DemandedElts);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
+ bool AssumeSingleUse = false);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
+ bool SliceUpLoad(SDNode *N);
+
+ // Scalars have size 0 to distinguish from singleton vectors.
+ SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+ bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+ bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
+ /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad);
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ /// Call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDSAT(SDNode *N);
+ SDValue visitSUBSAT(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDO(SDNode *N);
+ SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitSUBC(SDNode *N);
+ SDValue visitSUBO(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitADDCARRY(SDNode *N);
+ SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
+ SDValue visitSUBE(SDNode *N);
+ SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitMULFIX(SDNode *N);
+ SDValue useDivRem(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitMULO(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitFunnelShift(SDNode *N);
+ SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
+ SDValue visitBSWAP(SDNode *N);
+ SDValue visitBITREVERSE(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCCARRY(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertExt(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFSQRT(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitFPOW(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
+ SDValue visitFMINNUM(SDNode *N);
+ SDValue visitFMAXNUM(SDNode *N);
+ SDValue visitFMINIMUM(SDNode *N);
+ SDValue visitFMAXIMUM(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitLIFETIME_END(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
+ SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
+ SDValue visitMGATHER(SDNode *N);
+ SDValue visitMSCATTER(SDNode *N);
+ SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitVECREDUCE(SDNode *N);
+
+ SDValue visitFADDForFMACombine(SDNode *N);
+ SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMADistributiveCombine(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags);
+
+ SDValue visitShiftByConstant(SDNode *N);
+
+ SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldVSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC);
+ SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
+ SDValue unfoldMaskedMerge(SDNode *N);
+ SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ const SDLoc &DL, bool foldBooleans);
+ SDValue rebuildSetCC(SDValue N);
+
+ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const;
+ bool isOneUseSetCC(SDValue N) const;
+ bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
+
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue CombineExtLoad(SDNode *N);
+ SDValue CombineZExtLogicopShiftLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildSDIVPow2(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
+ SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal);
+ SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
+ SDValue MatchStoreCombine(StoreSDNode *N);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue splitMergedValStore(StoreSDNode *ST);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue convertBuildVecZextToZext(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecToShuffle(SDNode *N);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
+ SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
+
+ /// Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases);
+
+ /// Return true if there is any possibility that the two addresses overlap.
+ bool isAlias(SDNode *Op0, SDNode *Op1) const;
+
+ /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+ /// chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// Try to replace a store and any possibly adjacent stores on
+ /// consecutive chains with better chains. Return true only if St is
+ /// replaced.
+ ///
+ /// Notice that other chains may still be replaced even if the function
+ /// returns false.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
+ };
+
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT);
+
+ /// Helper function to calculate whether the given Load/Store can have its
+ /// width reduced to ExtVT.
+ bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
+ EVT &MemVT, unsigned ShAmt = 0);
+
+ /// Used by BackwardsPropagateMask to find suitable loads.
+ bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask, SDNode *&NodeToMask);
+ /// Attempt to propagate a given AND node back to load leaves so that they
+ /// can be combined into narrow loads.
+ bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
+
+ /// Helper function for MergeConsecutiveStores which merges the
+ /// component store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
+ /// This is a helper function for MergeConsecutiveStores. When the
+ /// source elements of the consecutive stores are all constants or
+ /// all extracted vector elements, try to merge them into one
+ /// larger store introducing bitcasts if necessary. \return True
+ /// if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector,
+ bool UseTrunc);
+
+ /// This is a helper function for MergeConsecutiveStores. Stores
+ /// that potentially may be merged with St are placed in
+ /// StoreNodes. RootNode is a chain predecessor to all store
+ /// candidates.
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&Root);
+
+ /// Helper function for MergeConsecutiveStores. Checks if
+ /// candidate stores have indirect dependency through their
+ /// operands. RootNode is the predecessor to all stores calculated
+ /// by getStoreMergeCandidates and is used to prune the dependency check.
+ /// \return True if safe to merge.
+ bool checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode);
+
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return number of stores that were merged into a merged store (the
+ /// affected nodes are stored as a prefix in \p StoreNodes).
+ bool MergeConsecutiveStores(StoreSDNode *St);
+
+ /// Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
+ /// Helper function to determine whether the target supports operation
+ /// given by \p Opcode for type \p VT, that is, whether the operation
+ /// is legal or custom before legalizing operations, and whether is
+ /// legal (but not custom) after legalization.
+ bool hasOperation(unsigned Opcode, EVT VT) {
+ if (LegalOperations)
+ return TLI.isOperationLegal(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT);
+ }
+
+ public:
+ /// Runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// Returns a type large enough to hold any valid shift amount - before type
+ /// legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
+ }
+
+ /// This method returns true if we are running before type legalization or
+ /// if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+
+ /// Convenience wrapper around TargetLowering::getSetCCResultType
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType);
+ };
+
+/// This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ DC.removeFromWorklist(N);
+ }
+};
+
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistInserter(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ // FIXME: Ideally we could add N to the worklist, but this causes exponential
+ // compile time costs in large DAGs, e.g. Halide.
+ void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorklist(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+ removeFromWorklist(N);
+
+ // If the operands of this node are only used by the node, they will now be
+ // dead. Make sure to re-visit them and recursively delete dead nodes.
+ for (const SDValue &Op : N->ops())
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorklist(Op.getNode());
+
+ DAG.DeleteNode(N);
+}
+
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+ unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+ LHS = LHS.zextOrSelf(Bits);
+ RHS = RHS.zextOrSelf(Bits);
+}
+
+// Return true if this node is a setcc, or is a select_cc
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SELECT_CC ||
+ !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+ !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+ return false;
+
+ if (TLI.getBooleanContents(N.getValueType()) ==
+ TargetLowering::UndefinedBooleanContent)
+ return false;
+
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+}
+
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+// Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+ return !(Const->isOpaque() && NoOpaques);
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+ if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+ (Const->isOpaque() && NoOpaques))
+ return false;
+ }
+ return true;
+}
+
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ return isConstantOrConstantVector(V, NoOpaques) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
+}
+
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDValue N0,
+ SDValue N1) {
+ // Currently this only tries to ensure we don't undo the GEP splits done by
+ // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+ // we check if the following transformation would be problematic:
+ // (load/store (add, (add, x, offset1), offset2)) ->
+ // (load/store (add, x, offset1+offset2)).
+
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ return false;
+
+ if (N0.hasOneUse())
+ return false;
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N1);
+ if (!C1 || !C2)
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt &C2APIntVal = C2->getAPIntValue();
+ if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ return false;
+
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getBitWidth() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N0->uses()) {
+ auto LoadStore = dyn_cast<MemSDNode>(Node);
+ if (LoadStore) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
+ // Don't reassociate reductions.
+ if (N0->getFlags().hasVectorReduction())
+ return SDValue();
+
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N0.hasOneUse()) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
+ return SDValue();
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
+ // Floating-point reassociation is not allowed without loose FP math.
+ if (N0.getValueType().isFloatingPoint() ||
+ N1.getValueType().isFloatingPoint())
+ if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+ return SDValue();
+
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ return Combined;
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo - 1 << " other values\n");
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!");
+
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To);
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorklist(To[i].getNode());
+ AddUsersToWorklist(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty())
+ deleteAndRecombine(N);
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorklist(TLO.New.getNode());
+ AddUsersToWorklist(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty())
+ deleteAndRecombine(TLO.Old.getNode());
+}
+
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+/// Check the specified vector node value to see if it can be simplified or
+/// if things it uses can be simplified as it only uses some of the elements.
+/// If so, return true.
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ bool AssumeSingleUse) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownUndef, KnownZero;
+ if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ TLO, 0, AssumeSingleUse))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ SDLoc DL(Load);
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
+
+ LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG); dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
+ deleteAndRecombine(Load);
+ AddToWorklist(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ SDLoc DL(Op);
+ if (ISD::isUNINDEXEDLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, DL, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
+ break;
+ case ISD::AssertZext:
+ if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
+ break;
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, DL, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
+}
+
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
+
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
+
+ // We are always replacing N0/N1's use in N and only need
+ // additional replacements if there are additional uses.
+ Replace0 &= !N0->hasOneUse();
+ Replace1 &= (N0 != N1) && !N1->hasOneUse();
+
+ // Combine Op here so it is preserved past replacements.
+ CombineTo(Op.getNode(), RV);
+
+ // If operands have a use ordering, make sure we deal with
+ // predecessor first.
+ if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
+ std::swap(N0, N1);
+ std::swap(NN0, NN1);
+ }
+
+ if (Replace0) {
+ AddToWorklist(NN0.getNode());
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ }
+ if (Replace1) {
+ AddToWorklist(NN1.getNode());
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+ }
+ return Op;
+ }
+ return SDValue();
+}
+
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(N0, PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(N0, PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+
+ if (!N0.getNode())
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ if (!ISD::isUNINDEXEDLoad(Op.getNode()))
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ SDLoc DL(Op);
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
+
+ LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
+ deleteAndRecombine(N);
+ AddToWorklist(Result.getNode());
+ return true;
+ }
+ return false;
+}
+
+/// Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+ if (!N->use_empty())
+ return false;
+
+ SmallSetVector<SDNode *, 16> Nodes;
+ Nodes.insert(N);
+ do {
+ N = Nodes.pop_back_val();
+ if (!N)
+ continue;
+
+ if (N->use_empty()) {
+ for (const SDValue &ChildN : N->op_values())
+ Nodes.insert(ChildN.getNode());
+
+ removeFromWorklist(N);
+ DAG.DeleteNode(N);
+ } else {
+ AddToWorklist(N);
+ }
+ } while (!Nodes.empty());
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
+
+ WorklistInserter AddNodes(*this);
+
+ // Add all the dag nodes to the worklist.
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // While we have a valid worklist entry node, try to combine it.
+ while (SDNode *N = getNextWorklistEntry()) {
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (recursivelyDeleteUnusedNodes(N))
+ continue;
+
+ WorklistRemover DeadNodes(*this);
+
+ // If this combine is running after legalizing the DAG, re-legalize any
+ // nodes pulled off the worklist.
+ if (Level == AfterLegalizeDAG) {
+ SmallSetVector<SDNode *, 16> UpdatedNodes;
+ bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+ for (SDNode *LN : UpdatedNodes) {
+ AddUsersToWorklist(LN);
+ AddToWorklist(LN);
+ }
+ if (!NIsValid)
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ CombinedNodes.insert(N);
+ for (const SDValue &ChildN : N->op_values())
+ if (!CombinedNodes.count(ChildN.getNode()))
+ AddToWorklist(ChildN.getNode());
+
+ SDValue RV = combine(N);
+
+ if (!RV.getNode())
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
+
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ DAG.ReplaceAllUsesWith(N, &RV);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node. This will also take care of adding any
+ // operands which have lost a user to the worklist.
+ recursivelyDeleteUnusedNodes(N);
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::SADDSAT:
+ case ISD::UADDSAT: return visitADDSAT(N);
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: return visitSUBSAT(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::SADDO:
+ case ISD::UADDO: return visitADDO(N);
+ case ISD::SUBC: return visitSUBC(N);
+ case ISD::SSUBO:
+ case ISD::USUBO: return visitSUBO(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::SUBE: return visitSUBE(N);
+ case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: return visitMULFIX(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO:
+ case ISD::UMULO: return visitMULO(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::ROTR:
+ case ISD::ROTL: return visitRotate(N);
+ case ISD::FSHL:
+ case ISD::FSHR: return visitFunnelShift(N);
+ case ISD::ABS: return visitABS(N);
+ case ISD::BSWAP: return visitBSWAP(N);
+ case ISD::BITREVERSE: return visitBITREVERSE(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCCARRY: return visitSETCCCARRY(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertSext:
+ case ISD::AssertZext: return visitAssertExt(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FSQRT: return visitFSQRT(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::FPOW: return visitFPOW(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FMINNUM: return visitFMINNUM(N);
+ case ISD::FMAXNUM: return visitFMAXNUM(N);
+ case ISD::FMINIMUM: return visitFMINIMUM(N);
+ case ISD::FMAXIMUM: return visitFMAXIMUM(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
+ case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MGATHER: return visitMGATHER(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSCATTER: return visitMSCATTER(N);
+ case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::LIFETIME_END: return visitLIFETIME_END(N);
+ case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (!RV.getNode()) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (!RV.getNode()) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try to eliminate it if the commuted
+ // version is already present in the DAG.
+ if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ // Don't simplify token factors if optnone.
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ // If the sole user is a token factor, we should make sure we have a
+ // chance to merge them together. This prevents TF chains from inhibiting
+ // optimizations.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+ AddToWorklist(*(N->use_begin()));
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+
+ SDNode *TF = TFs[i];
+ // Check each of the operands.
+ for (const SDValue &Op : TF->op_values()) {
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // redundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ Changed = true;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()).second)
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.count(Op) != 0) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
+
+ // If we've changed things around then replace token factor.
+ if (Changed) {
+ SDValue Result;
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
+ } else {
+ Result = DAG.getTokenFactor(SDLoc(N), Ops);
+ }
+ }
+ return Result;
+ }
+ return SDValue();
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorklistRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorklist(N);
+ do {
+ // Do as a single replacement to avoid rewalking use lists.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+ DAG.ReplaceAllUsesWith(N, Ops.data());
+ } while (!N->use_empty());
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
+/// ConstantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+ return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+ "Unexpected binary operator");
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ unsigned SelOpNo = 0;
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+ SelOpNo = 1;
+ Sel = BO->getOperand(1);
+ }
+
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ // The exception is "and" and "or" with either 0 or -1 in which case we can
+ // propagate non constant operands into select. I.e.:
+ // and (select Cond, 0, -1), X --> select Cond, 0, X
+ // or X, (select Cond, -1, 0) --> select Cond, -1, X
+ auto BinOpcode = BO->getOpcode();
+ bool CanFoldNonConst =
+ (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
+ (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
+
+ SDValue CBO = BO->getOperand(SelOpNo ^ 1);
+ if (!CanFoldNonConst &&
+ !isConstantOrConstantVector(CBO, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CBO))
+ return SDValue();
+
+ EVT VT = Sel.getValueType();
+
+ // In case of shift value and shift amount may have different VT. For instance
+ // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
+ // swapped operands and value types do not match. NB: x86 is fine if operands
+ // are not swapped with shift amount VT being not bigger than shifted value.
+ // TODO: that is possible to check for a shift operation, correct VTs and
+ // still perform optimization on x86 if needed.
+ if (SelOpNo && VT != CBO.getValueType())
+ return SDValue();
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
+ SDLoc DL(Sel);
+ SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+ : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
+ if (!CanFoldNonConst && !NewCT.isUndef() &&
+ !isConstantOrConstantVector(NewCT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(NewCT))
+ return SDValue();
+
+ SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+ : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
+ if (!CanFoldNonConst && !NewCF.isUndef() &&
+ !isConstantOrConstantVector(NewCF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(NewCF))
+ return SDValue();
+
+ SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SelectOp->setFlags(BO->getFlags());
+ return SelectOp;
+}
+
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // Match a constant operand and a zext operand for the math instruction:
+ // add Z, C
+ // sub C, Z
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ auto *CN = dyn_cast<ConstantSDNode>(C);
+ if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ // Match the zext operand as a setcc of a boolean.
+ if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
+ Z.getOperand(0).getValueType() != MVT::i1)
+ return SDValue();
+
+ // Match the compare as: setcc (X & 1), 0, eq.
+ SDValue SetCC = Z.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
+ SetCC.getOperand(0).getOpcode() != ISD::AND ||
+ !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // We are adding/subtracting a constant and an inverted low bit. Turn that
+ // into a subtract/add of the low bit with incremented/decremented constant:
+ // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
+ // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
+ EVT VT = C.getValueType();
+ SDLoc DL(N);
+ SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
+ SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
+ DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
+}
+
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ // TODO - support non-uniform vector amounts.
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
+ if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || !isBitwiseNot(Not))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
+ SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
+ APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+}
+
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (add x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+
+ if (N1.isUndef())
+ return N1;
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ // fold (add c1, c2) -> c1+c2
+ return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (add x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ // fold ((A-c1)+c2) -> (A+(c2-c1))
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+ SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+ N0.getOperand(1).getNode());
+ assert(Sub && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+ }
+
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
+ SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+ N0.getOperand(0).getNode());
+ assert(Add && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneOrOneSplat(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
+ }
+ }
+
+ // Undo the add -> or combine to merge constant offsets from a frame index.
+ if (N0.getOpcode() == ISD::OR &&
+ isa<FrameIndexSDNode>(N0.getOperand(0)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+ }
+ }
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate add
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ return RADD;
+ }
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+
+ // fold ((A-B)+(C-A)) -> (C-B)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N0.getOperand(1));
+
+ // fold ((A-B)+(B-C)) -> (A-C)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N1.getOperand(1));
+
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+ N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
+ DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
+ }
+
+ // fold (add (umax X, C), -C) --> (usubsat X, C)
+ if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return (!Max && !Op) ||
+ (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+ };
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+ /*AllowUndefs*/ true))
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (isOneOrOneSplat(N1)) {
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
+ // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+ if (N0.getOpcode() == ISD::ADD ||
+ N0.getOpcode() == ISD::UADDO ||
+ N0.getOpcode() == ISD::SADDO) {
+ SDValue A, Xor;
+
+ if (isBitwiseNot(N0.getOperand(0))) {
+ A = N0.getOperand(1);
+ Xor = N0.getOperand(0);
+ } else if (isBitwiseNot(N0.getOperand(1))) {
+ A = N0.getOperand(0);
+ Xor = N0.getOperand(1);
+ }
+
+ if (Xor)
+ return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+ }
+
+ // Look for:
+ // add (add x, y), 1
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+ }
+ }
+
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (add_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+ // fold (add_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (add_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ // If it cannot overflow, transform into an add.
+ if (Opcode == ISD::UADDSAT)
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+
+ return SDValue();
+}
+
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
+ bool Masked = false;
+
+ // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
+ while (true) {
+ if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
+ V = V.getOperand(0);
+ continue;
+ }
+
+ if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ Masked = true;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ break;
+ }
+
+ // If this is not a carry, return.
+ if (V.getResNo() != 1)
+ return SDValue();
+
+ if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
+ V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
+ return SDValue();
+
+ EVT VT = V.getNode()->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+ return SDValue();
+
+ // If the result is masked, then no matter what kind of bool it is we can
+ // return. If it isn't, then we need to make sure the bool type is either 0 or
+ // 1 and not other values.
+ if (Masked ||
+ TLI.getBooleanContents(V.getValueType()) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return V;
+
+ return SDValue();
+}
+
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N0.getValueType();
+ if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+ // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0,
+ DAG.getNode(ISD::SHL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+
+ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+ return V;
+
+ // Look for:
+ // add (add x, 1), y
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+ }
+
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
+
+ // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+ // rather than 'add 0/-1' (the zext should get folded).
+ // add (sext i1 Y), X --> sub X, (zext i1 Y)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ // add X, (sextinreg Y i1) -> sub X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
+ }
+ }
+
+ // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+ N1.getResNo() == 0)
+ return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
+ N0, N1.getOperand(0), N1.getOperand(2));
+
+ // (add X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), N0,
+ DAG.getConstant(0, DL, VT), Carry);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ DL, MVT::Glue));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+static SDValue flipBoolean(SDValue V, const SDLoc &DL,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+
+ SDValue Cst;
+ switch (TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ Cst = DAG.getConstant(1, DL, VT);
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ Cst = DAG.getAllOnesConstant(DL, VT);
+ break;
+ }
+
+ return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
+}
+
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool Force) {
+ if (Force && isa<ConstantSDNode>(V))
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+
+ if (V.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+ if (!Const)
+ return SDValue();
+
+ EVT VT = V.getValueType();
+
+ bool IsFlip = false;
+ switch(TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ IsFlip = Const->isOne();
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ IsFlip = Const->isAllOnesValue();
+ break;
+ case TargetLowering::UndefinedBooleanContent:
+ IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+ break;
+ }
+
+ if (IsFlip)
+ return V.getOperand(0);
+ if (Force)
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SADDO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (addo x, 0) -> x + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ if (!IsSigned) {
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
+
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ // If Y + 1 cannot overflow.
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
+ SDValue Y = N1.getOperand(0);
+ SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
+ if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
+ N1.getOperand(2));
+ }
+
+ // (uaddo X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), VT), Carry);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (addcarry x, y, false) -> (uaddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ if (isNullConstant(N0) && isNullConstant(N1)) {
+ EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
+ SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
+ AddToWorklist(CarryExt.getNode());
+ return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
+ DAG.getConstant(1, DL, VT)),
+ DAG.getConstant(0, DL, CarryVT));
+ }
+
+ if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
+ return Combined;
+
+ return SDValue();
+}
+
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ SDValue X, SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (addcarry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (addcarry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N) {
+ // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+ if (isBitwiseNot(N0))
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+ SDLoc DL(N);
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
+
+ // Iff the flag result is dead:
+ // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+ // or the dependency between the instructions.
+ if ((N0.getOpcode() == ISD::ADD ||
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+ N0.getValue(1) != CarryIn)) &&
+ isNullConstant(N1) && !N->hasAnyUseOfValue(1))
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+ N0.getOperand(0), N0.getOperand(1), CarryIn);
+
+ /**
+ * When one of the addcarry argument is itself a carry, we may be facing
+ * a diamond carry propagation. In which case we try to transform the DAG
+ * to ensure linear carry propagation if that is possible.
+ */
+ if (auto Y = getAsCarry(TLI, N1)) {
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
+ }
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector())
+ return DAG.getConstant(0, DL, VT);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return DAG.getConstant(0, DL, VT);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (sub x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub c1, c2) -> c1-c2
+ return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ if (isNullOrNullSplat(N0)) {
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ // Right-shifting everything out but the sign bit followed by negation is
+ // the same as flipping arithmetic/logical shift type without the negation:
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+ ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+ if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
+ auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+ if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+ return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+ }
+ }
+
+ // 0 - X --> 0 if the sub is NUW.
+ if (N->getFlags().hasNoUnsignedWrap())
+ return N0;
+
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
+ // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+ // N1 must be 0 because negating the minimum signed value is undefined.
+ if (N->getFlags().hasNoSignedWrap())
+ return N0;
+
+ // 0 - X --> X if X is 0 or the minimum signed value.
+ return N1;
+ }
+ }
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (isAllOnesOrAllOnesSplat(N0))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
+ // fold (A - (0-B)) -> A+B
+ if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
+
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ if (N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD) {
+ SDValue N11 = N1.getOperand(1);
+ if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N11.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+ }
+ }
+
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold (c1-A)-c2 -> (c1-c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ }
+
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(1));
+
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
+ // fold (A-(B-C)) -> A+(C-B)
+ if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
+ N1.getOperand(0)));
+
+ // fold (X - (-Y * Z)) -> (X + (Y * Z))
+ if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
+ if (N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ if (N1.getOperand(1).getOpcode() == ISD::SUB &&
+ isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ }
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ return V;
+
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ // Look for:
+ // sub y, (xor x, -1)
+ // And if the target does not like this form then turn into:
+ // add (add x, y), 1
+ if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+ }
+
+ // Hoist one-use addition by non-opaque constant:
+ // (x + C) - y -> (x - y) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+ // y - (x + C) -> (y - x) - C
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+ }
+ // (x - C) - y -> (x - y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+ }
+ // (C - x) - y -> C - (x + y)
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+ }
+
+ // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+ // rather than 'sub 0/1' (the sext should get folded).
+ // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+ if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+ }
+
+ // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
+ SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
+ SDValue S0 = N1.getOperand(0);
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
+ }
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ DL, VT);
+ }
+
+ // sub X, (sextinreg Y i1) -> add X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
+ }
+ }
+
+ // Prefer an add for more folding potential and possibly better codegen:
+ // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+ if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+ SDValue ShAmt = N1.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (ShAmtC &&
+ ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+ }
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
+ // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (SDValue Carry = getAsCarry(TLI, N0)) {
+ SDValue X = N1;
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
+ Carry);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub_sat x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (sub_sat x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, DL, VT);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub_sat c1, c2) -> c3
+ return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
+ N1.getNode());
+ }
+
+ // fold (sub_sat x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SSUBO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (subo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (subox, c) -> (addo x, -c)
+ if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ // fold (subo x, 0) -> x + no borrow
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (subcarry x, y, false) -> (usubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
+// UMULFIXSAT here.
+SDValue DAGCombiner::visitMULFIX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Scale = N->getOperand(2);
+ EVT VT = N0.getValueType();
+
+ // fold (mulfix x, undef, scale) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
+
+ // fold (mulfix x, 0, scale) -> 0
+ if (isNullConstant(N1))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold (mul x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ bool N0IsConst = false;
+ bool N1IsConst = false;
+ bool N1IsOpaqueConst = false;
+ bool N0IsOpaqueConst = false;
+ APInt ConstValue0, ConstValue1;
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
+ N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+ assert((!N0IsConst ||
+ ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
+ assert((!N1IsConst ||
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
+ } else {
+ N0IsConst = isa<ConstantSDNode>(N0);
+ if (N0IsConst) {
+ ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+ N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+ }
+ N1IsConst = isa<ConstantSDNode>(N1);
+ if (N1IsConst) {
+ ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+ N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+ }
+ }
+
+ // fold (mul c1, c2) -> c1*c2
+ if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+ return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1IsConst && ConstValue1.isNullValue())
+ return N1;
+ // fold (mul x, 1) -> x
+ if (N1IsConst && ConstValue1.isOneValue())
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (mul x, -1) -> 0-x
+ if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
+ // fold (mul x, (1 << c)) -> x << c
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) &&
+ (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
+ SDLoc DL(N);
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ }
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ unsigned Log2Val = (-ConstValue1).logBase2();
+ SDLoc DL(N);
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL,
+ getShiftAmountTy(N0.getValueType()))));
+ }
+
+ // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // mul x, (2^N + 1) --> add (shl x, N), x
+ // mul x, (2^N - 1) --> sub (shl x, N), x
+ // Examples: x * 33 --> (x << 5) + x
+ // x * 15 --> (x << 4) - x
+ // x * -33 --> -((x << 5) + x)
+ // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ // TODO: We could handle more general decomposition of any constant by
+ // having the target set a limit on number of ops and making a
+ // callback to determine that sequence (similar to sqrt expansion).
+ unsigned MathOp = ISD::DELETED_NODE;
+ APInt MulC = ConstValue1.abs();
+ if ((MulC - 1).isPowerOf2())
+ MathOp = ISD::ADD;
+ else if ((MulC + 1).isPowerOf2())
+ MathOp = ISD::SUB;
+
+ if (MathOp != ISD::DELETED_NODE) {
+ unsigned ShAmt =
+ MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ assert(ShAmt < VT.getScalarSizeInBits() &&
+ "multiply-by-constant generated out of bounds shift");
+ SDLoc DL(N);
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
+ SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ if (ConstValue1.isNegative())
+ R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+ return R;
+ }
+ }
+
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N0.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
+ if (isConstantOrConstantVector(C3))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(nullptr, 0), Y(nullptr, 0);
+
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ return RMUL;
+
+ return SDValue();
+}
+
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ EVT NodeType = Node->getValueType(0);
+ if (!NodeType.isSimple())
+ return false;
+ switch (NodeType.getSimpleVT().SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+
+ // DivMod lib calls can still work on non-legal types if using lib-calls.
+ EVT VT = Node->getValueType(0);
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
+ return SDValue();
+
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
+ User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ unsigned Opc = N->getOpcode();
+ bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // X / undef -> undef
+ // X % undef -> undef
+ // X / 0 -> undef
+ // X % 0 -> undef
+ // NOTE: This includes vectors where any divisor element is zero/undef.
+ if (DAG.isUndef(Opc, {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // 0 / X -> 0
+ // 0 % X -> 0
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ if (N0C && N0C->isNullValue())
+ return N0;
+
+ // X / X -> 1
+ // X % X -> 0
+ if (N0 == N1)
+ return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
+
+ // X / 1 -> X
+ // X % 1 -> 0
+ // If this is a boolean op (single-bit element type), we can't have
+ // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+ // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
+ // it's a 1.
+ if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ SDLoc DL(N);
+
+ // fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+ // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
+ if (N1C && N1C->getAPIntValue().isMinSignedValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
+
+ if (SDValue V = visitSDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ auto IsPowerOfTwo = [](ConstantSDNode *C) {
+ if (C->isNullValue() || C->isOpaque())
+ return false;
+ if (C->getAPIntValue().isPowerOf2())
+ return true;
+ if ((-C->getAPIntValue()).isPowerOf2())
+ return true;
+ return false;
+ };
+
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ // FIXME: We check for the exact bit here because the generic lowering gives
+ // better results in that case. The target-specific lowering should learn how
+ // to handle exact sdivs efficiently.
+ if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
+ // Target-specific implementation of sdiv x, pow2.
+ if (SDValue Res = BuildSDIVPow2(N))
+ return Res;
+
+ // Create constants that are functions of the shift amount value.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
+ SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
+ C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
+ SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
+ if (!isConstantOrConstantVector(Inexact))
+ return SDValue();
+
+ // Splat the sign bit into the register
+ SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
+ AddToWorklist(Sign.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
+ AddToWorklist(Srl.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
+ AddToWorklist(Add.getNode());
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
+ AddToWorklist(Sra.getNode());
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
+ SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
+ SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
+ Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
+
+ // If dividing by a positive value, we're done. Otherwise, the result must
+ // be negated.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
+
+ // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
+ SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
+ SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
+ return Res;
+ }
+
+ // If integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ SDLoc DL(N);
+
+ // fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
+ N0C, N1C))
+ return Folded;
+ // fold (udiv X, -1) -> select(X == -1, 1, 0)
+ if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (SDValue V = visitUDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
+ { N0, N1 })) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ SDValue N10 = N1.getOperand(0);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N10)) {
+ SDValue LogBase2 = BuildLogBase2(N10, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ }
+ }
+
+ // fold (udiv x, c) -> alternate
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ return SDValue();
+}
+
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
+
+ // fold (rem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
+ return Folded;
+ // fold (urem X, -1) -> select(X == -1, 0, x)
+ if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(0, DL, VT), N0);
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
+ } else {
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
+ if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
+ if (N1.getOpcode() == ISD::SHL &&
+ DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
+ }
+
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
+ // speculative DIV must not cause a DIVREM conversion. We guard against this
+ // by skipping the simplification if isIntDivCheap(). When div is not cheap,
+ // combine will not return a DIVREM. Regardless, checking cheapness here
+ // makes sense since the simplification results in fatter code.
+ if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ SDValue OptimizedDiv =
+ isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
+ if (OptimizedDiv.getNode()) {
+ // If the equivalent Div node also exists, update its users.
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+ { N0, N1 }))
+ CombineTo(DivNode, OptimizedDiv);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(OptimizedDiv.getNode());
+ AddToWorklist(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (VT.isVector()) {
+ // fold (mulhs x, 0) -> 0
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+ }
+
+ // fold (mulhs x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (isOneConstant(N1))
+ return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
+ getShiftAmountTy(N0.getValueType())));
+
+ // fold (mulhs x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (VT.isVector()) {
+ // fold (mulhu x, 0) -> 0
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
+ }
+
+ // fold (mulhu x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (isOneConstant(N1))
+ return DAG.getConstant(0, DL, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists && (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ AddToWorklist(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ AddToWorklist(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // (umul_lohi N0, 0) -> (0, 0)
+ if (isNullConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, Zero, Zero);
+ }
+
+ // (umul_lohi N0, 1) -> (N0, 0)
+ if (isOneConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, N->getOperand(0), Zero);
+ }
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ bool IsSigned = (ISD::SMULO == N->getOpcode());
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (mulo x, 0) -> 0 + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // (mulo x, 2) -> (addo x, x)
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold operation with constant operands.
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+
+ // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
+ // Only do this if the current op isn't legal and the flipped is.
+ unsigned Opcode = N->getOpcode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegal(Opcode, VT) &&
+ (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ (N1.isUndef() || DAG.SignBitIsZero(N1))) {
+ unsigned AltOpcode;
+ switch (Opcode) {
+ case ISD::SMIN: AltOpcode = ISD::UMIN; break;
+ case ISD::SMAX: AltOpcode = ISD::UMAX; break;
+ case ISD::UMIN: AltOpcode = ISD::SMIN; break;
+ case ISD::UMAX: AltOpcode = ISD::SMAX; break;
+ default: llvm_unreachable("Unknown MINMAX opcode");
+ }
+ if (TLI.isOperationLegal(AltOpcode, VT))
+ return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+/// If this is a bitwise logic instruction and both operands have the same
+/// opcode, try to sink the other opcode after the logic instruction.
+SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned LogicOpcode = N->getOpcode();
+ unsigned HandOpcode = N0.getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR) && "Expected logic opcode");
+ assert(HandOpcode == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNumOperands() == 0)
+ return SDValue();
+
+ // FIXME: We should check number of uses of the operands to not increase
+ // the instruction count for all transforms.
+
+ // Handle size-changing casts.
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N1.getOperand(0);
+ EVT XVT = X.getValueType();
+ SDLoc DL(N);
+ if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
+ HandOpcode == ISD::SIGN_EXTEND) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching integer source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization. Don't ever
+ // create an unsupported vector op.
+ if ((VT.isVector() || LegalOperations) &&
+ !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
+ return SDValue();
+ // Avoid infinite looping with PromoteIntBinOp.
+ // TODO: Should we apply desirable/legal constraints to all opcodes?
+ if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
+ !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ return SDValue();
+ // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
+ if (HandOpcode == ISD::TRUNCATE) {
+ // If both operands have other uses, this transform would create extra
+ // instructions without eliminating anything.
+ if (!N0.hasOneUse() && !N1.hasOneUse())
+ return SDValue();
+ // We need matching source types.
+ if (XVT != Y.getValueType())
+ return SDValue();
+ // Don't create an illegal op during or after legalization.
+ if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
+ return SDValue();
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop. Also, don't create a logic op on an illegal type.
+ if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
+ return SDValue();
+ if (!TLI.isTypeLegal(XVT))
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // For binops SHL/SRL/SRA/AND:
+ // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
+ if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
+ HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+ }
+
+ // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
+ if (HandOpcode == ISD::BSWAP) {
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization up until type legalization, before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
+ Level <= AfterLegalizeTypes) {
+ // Input types must be integer and the same.
+ if (XVT.isInteger() && XVT == Y.getValueType() &&
+ !(VT.isVector() && TLI.isTypeLegal(VT) &&
+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ return DAG.getNode(HandOpcode, DL, VT, Logic);
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ // There are other cases where moving the shuffle after the xor/and/or
+ // is profitable even if shuffles don't perform a swizzle.
+ // If both shuffles use the same mask, and both shuffles have the same first
+ // or second operand, then it might still be profitable to move the shuffle
+ // after the xor/and/or operation.
+ if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
+ assert(X.getValueType() == Y.getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ // Check also that shuffles have only one use to avoid introducing extra
+ // instructions.
+ if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
+ !SVN0->getMask().equals(SVN1->getMask()))
+ return SDValue();
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ SDValue ShOp = N0.getOperand(1);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
+ }
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0.getOperand(0);
+ if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+ ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
+ if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
+ SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
+ N1.getOperand(1));
+ return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT.getScalarType() != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullOrNullSplat(LR);
+ bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
+ IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+
+ // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+ // TODO - support non-uniform vector amounts.
+ if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+ // Match a shared variable operand and 2 non-opaque constant operands.
+ ConstantSDNode *C0 = isConstOrConstSplat(LR);
+ ConstantSDNode *C1 = isConstOrConstSplat(RR);
+ if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ // Canonicalize larger constant as C0.
+ if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
+ std::swap(C0, C1);
+
+ // The difference of the constants must be a single bit.
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &C1Val = C1->getAPIntValue();
+ if ((C0Val - C1Val).isPowerOf2()) {
+ // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
+ // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
+ SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
+ SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
+ }
+ }
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
+ : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
+ // fold (and x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ APInt ADDC = ADDI->getAPIntValue();
+ APInt SRLC = SRLI->getAPIntValue();
+ if (ADDC.getMinSignedBits() <= 64 &&
+ SRLC.ult(VT.getSizeInBits()) &&
+ !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLC.getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDLoc DL0(N0);
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, DL0, VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Reduce bit extract of low half of an integer to the narrower type.
+ // (and (srl i64:x, K), KMask) ->
+ // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Size = VT.getSizeInBits();
+ const APInt &AndMask = CAnd->getAPIntValue();
+ unsigned ShiftBits = CShift->getZExtValue();
+
+ // Bail out, this node will probably disappear anyway.
+ if (ShiftBits == 0)
+ return SDValue();
+
+ unsigned MaskBits = AndMask.countTrailingOnes();
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
+
+ if (AndMask.isMask() &&
+ // Required bits must not span the two halves of the integer and
+ // must fit in the half size type.
+ (ShiftBits + MaskBits <= Size / 2) &&
+ TLI.isNarrowingProfitable(VT, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ TLI.isZExtFree(HalfVT, VT)) {
+ // The isNarrowingProfitable is to avoid regressions on PPC and
+ // AArch64 which match a few 64-bit bit insert / bit extract patterns
+ // on downstream users of this. Those patterns could probably be
+ // extended to handle extensions mixed in.
+
+ SDValue SL(N0);
+ assert(MaskBits <= Size);
+
+ // Extracting the highest bit of the low half.
+ EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
+ N0.getOperand(0));
+
+ SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
+ SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
+ SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT) {
+ if (!AndC->getAPIntValue().isMask())
+ return false;
+
+ unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ return true;
+ }
+
+ // Do not change the width of a volatile or atomic loads.
+ if (!LoadN->isSimple())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ return true;
+}
+
+bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
+ ISD::LoadExtType ExtType, EVT &MemVT,
+ unsigned ShAmt) {
+ if (!LDST)
+ return false;
+ // Only allow byte offsets.
+ if (ShAmt % 8)
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!MemVT.isRound())
+ return false;
+
+ // Don't change the width of a volatile or atomic loads.
+ if (!LDST->isSimple())
+ return false;
+
+ // Verify that we are actually reducing a load width here.
+ if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
+ return false;
+
+ // Ensure that this isn't going to produce an unsupported memory access.
+ if (ShAmt &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), ShAmt / 8,
+ LDST->getMemOperand()->getFlags()))
+ return false;
+
+ // It's not possible to generate a constant of extended or untyped type.
+ EVT PtrType = LDST->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ if (isa<LoadSDNode>(LDST)) {
+ LoadSDNode *Load = cast<LoadSDNode>(LDST);
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(Load, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (Load->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
+ Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
+ return false;
+ } else {
+ assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
+ StoreSDNode *Store = cast<StoreSDNode>(LDST);
+ // Can't write outside the original store
+ if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
+ return false;
+ }
+ return true;
+}
+
+bool DAGCombiner::SearchForAndLoads(SDNode *N,
+ SmallVectorImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask,
+ SDNode *&NodeToMask) {
+ // Recursively search for the operands, looking for loads which can be
+ // narrowed.
+ for (SDValue Op : N->op_values()) {
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Some constants may need fixing up later if they are too large.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
+ (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
+ NodesWithConsts.insert(N);
+ continue;
+ }
+
+ if (!Op.hasOneUse())
+ return false;
+
+ switch(Op.getOpcode()) {
+ case ISD::LOAD: {
+ auto *Load = cast<LoadSDNode>(Op);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
+ isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
+
+ // ZEXTLOAD is already small enough.
+ if (Load->getExtensionType() == ISD::ZEXTLOAD &&
+ ExtVT.bitsGE(Load->getMemoryVT()))
+ continue;
+
+ // Use LE to convert equal sized loads to zext.
+ if (ExtVT.bitsLE(Load->getMemoryVT()))
+ Loads.push_back(Load);
+
+ continue;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::AssertZext: {
+ unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT VT = Op.getOpcode() == ISD::AssertZext ?
+ cast<VTSDNode>(Op.getOperand(1))->getVT() :
+ Op.getOperand(0).getValueType();
+
+ // We can accept extending nodes if the mask is wider or an equal
+ // width to the original type.
+ if (ExtVT.bitsGE(VT))
+ continue;
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
+ NodeToMask))
+ return false;
+ continue;
+ }
+
+ // Allow one node which will masked along with any loads found.
+ if (NodeToMask)
+ return false;
+
+ // Also ensure that the node to be masked only produces one data result.
+ NodeToMask = Op.getNode();
+ if (NodeToMask->getNumValues() > 1) {
+ bool HasValue = false;
+ for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
+ MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
+ if (VT != MVT::Glue && VT != MVT::Other) {
+ if (HasValue) {
+ NodeToMask = nullptr;
+ return false;
+ }
+ HasValue = true;
+ }
+ }
+ assert(HasValue && "Node to be masked has no data result?");
+ }
+ }
+ return true;
+}
+
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+ auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Mask)
+ return false;
+
+ if (!Mask->getAPIntValue().isMask())
+ return false;
+
+ // No need to do anything if the and directly uses a load.
+ if (isa<LoadSDNode>(N->getOperand(0)))
+ return false;
+
+ SmallVector<LoadSDNode*, 8> Loads;
+ SmallPtrSet<SDNode*, 2> NodesWithConsts;
+ SDNode *FixupNode = nullptr;
+ if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
+ if (Loads.size() == 0)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
+ SDValue MaskOp = N->getOperand(1);
+
+ // If it exists, fixup the single node we allow in the tree that needs
+ // masking.
+ if (FixupNode) {
+ LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+ FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
+ if (And.getOpcode() == ISD ::AND)
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
+ }
+
+ // Narrow any constants that need it.
+ for (auto *LogicN : NodesWithConsts) {
+ SDValue Op0 = LogicN->getOperand(0);
+ SDValue Op1 = LogicN->getOperand(1);
+
+ if (isa<ConstantSDNode>(Op0))
+ std::swap(Op0, Op1);
+
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+ Op1, MaskOp);
+
+ DAG.UpdateNodeOperands(LogicN, Op0, And);
+ }
+
+ // Create narrow loads.
+ for (auto *Load : Loads) {
+ LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
+ SDValue(Load, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+ if (And.getOpcode() == ISD ::AND)
+ And = SDValue(
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
+ SDValue NewLoad = ReduceLoadWidth(And.getNode());
+ assert(NewLoad &&
+ "Shouldn't be masking the load if it can't be narrowed");
+ CombineTo(Load, NewLoad, NewLoad.getValue(1));
+ }
+ DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
+ return true;
+ }
+ return false;
+}
+
+// Unfold
+// x & (-1 'logical shift' y)
+// To
+// (x 'opposite logical shift' y) 'logical shift' y
+// if it is better for performance.
+SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Do we actually prefer shifts over mask?
+ if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
+ return SDValue();
+
+ // Try to match (-1 '[outer] logical shift' y)
+ unsigned OuterShift;
+ unsigned InnerShift; // The opposite direction to the OuterShift.
+ SDValue Y; // Shift amount.
+ auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
+ if (!M.hasOneUse())
+ return false;
+ OuterShift = M->getOpcode();
+ if (OuterShift == ISD::SHL)
+ InnerShift = ISD::SRL;
+ else if (OuterShift == ISD::SRL)
+ InnerShift = ISD::SHL;
+ else
+ return false;
+ if (!isAllOnesConstant(M->getOperand(0)))
+ return false;
+ Y = M->getOperand(1);
+ return true;
+ };
+
+ SDValue X;
+ if (matchMask(N1))
+ X = N0;
+ else if (matchMask(N0))
+ X = N1;
+ else
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // tmp = x 'opposite logical shift' y
+ SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
+ // ret = tmp 'logical shift' y
+ SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
+
+ return T1;
+}
+
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+ // This is probably not worthwhile without a supported type.
+ EVT VT = And->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // Look through an optional extension and find a 'not'.
+ // TODO: Should we favor test+set even without the 'not' op?
+ SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
+ if (Not.getOpcode() == ISD::ANY_EXTEND)
+ Not = Not.getOperand(0);
+ if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+ return SDValue();
+
+ // Look though an optional truncation. The source operand may not be the same
+ // type as the original 'and', but that is ok because we are masking off
+ // everything but the low bit.
+ SDValue Srl = Not.getOperand(0);
+ if (Srl.getOpcode() == ISD::TRUNCATE)
+ Srl = Srl.getOperand(0);
+
+ // Match a shift-right by constant.
+ if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
+ !isa<ConstantSDNode>(Srl.getOperand(1)))
+ return SDValue();
+
+ // We might have looked through casts that make this transform invalid.
+ // TODO: If the source type is wider than the result type, do the mask and
+ // compare in the source type.
+ const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
+ unsigned VTBitWidth = VT.getSizeInBits();
+ if (ShiftAmt.uge(VTBitWidth))
+ return SDValue();
+
+ // Turn this into a bit-test pattern using mask op + setcc:
+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ SDLoc DL(And);
+ SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Mask = DAG.getConstant(
+ APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+ return DAG.getZExtOrTrunc(Setcc, DL, VT);
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // x & x --> x
+ if (N0 == N1)
+ return N0;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ SDLoc(N), N0.getValueType());
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ SDLoc(N), N1.getValueType());
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N0;
+ }
+
+ // fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (isAllOnesConstant(N1))
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate and
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ return RAND;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (VT.isVector())
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::OR &&
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD &&
+ N0.getOperand(0).getResNo() == 0) ||
+ (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (EltBitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+ SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if ((SplatBitSize % EltBitWidth) == 0) {
+ Constant = APInt::getAllOnesValue(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
+ }
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), SDLoc(Load),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ if (SDValue Res = ReduceLoadWidth(N)) {
+ LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
+ ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
+ AddToWorklist(N);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
+ return SDValue(N, 0);
+ }
+ }
+
+ if (Level >= AfterLegalizeTypes) {
+ // Attempt to propagate the AND back up to the leaves which, if they're
+ // loads, can be combined to narrow loads and the AND node can be removed.
+ // Perform after legalization so that extend nodes will already be
+ // combined into the loads.
+ if (BackwardsPropagateMask(N, DAG)) {
+ return SDValue(N, 0);
+ }
+ }
+
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ // Masking the negated extension of a boolean is just the zero-extended
+ // boolean:
+ // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+ // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+ //
+ // Note: the SimplifyDemandedBits fold below can make an information-losing
+ // transform, and then we have no way to find this better fold.
+ if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+ if (isNullOrNullSplat(N0.getOperand(0))) {
+ SDValue SubRHS = N0.getOperand(1);
+ if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return SubRHS;
+ if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+ }
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ (ISD::isEXTLoad(N0.getNode()) ||
+ (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+ unsigned MemBitSize = MemVT.getScalarSizeInBits();
+ APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+ if (DAG.MaskedValueIsZero(N1, ExtBits) &&
+ ((!LegalOperations && LN0->isSimple()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return BSwap;
+ }
+
+ if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
+ return Shifts;
+
+ if (TLI.hasBitTest(N0, N1))
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
+
+ return SDValue();
+}
+
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ // Also handle 0xffff since the LHS is guaranteed to have zeros there.
+ // This is needed for X86.
+ if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
+ N01C->getZExtValue() != 0xFFFF))
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ // Also allow 0xFFFF since the bits will be shifted out. This is needed
+ // for X86.
+ if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
+ N101C->getZExtValue() != 0xFFFF))
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (!LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too.
+ if (!LookPassAnd1 &&
+ !DAG.MaskedValueIsZero(
+ N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+ return SDValue();
+ }
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
+ if (OpSizeInBits > 16) {
+ SDLoc DL(N);
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getConstant(OpSizeInBits - 16, DL,
+ getShiftAmountTy(VT)));
+ }
+ return Res;
+}
+
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ SDValue N0 = N.getOperand(0);
+ unsigned Opc0 = N0.getOpcode();
+ if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = nullptr;
+ // SHL or SRL: look upstream for AND mask operand
+ if (Opc == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ else if (Opc0 == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned MaskByteOffset;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: MaskByteOffset = 0; break;
+ case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFFFF:
+ // In case demanded bits didn't clear the bits that will be shifted out.
+ // This is needed for X86.
+ if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
+ MaskByteOffset = 1;
+ break;
+ }
+ return false;
+ case 0xFF0000: MaskByteOffset = 2; break;
+ case 0xFF000000: MaskByteOffset = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ if (Opc == ISD::AND) {
+ if (MaskByteOffset == 0 || MaskByteOffset == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (Opc0 != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (Opc0 != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (MaskByteOffset != 0 && MaskByteOffset != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (MaskByteOffset != 1 && MaskByteOffset != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[MaskByteOffset])
+ return false;
+
+ Parts[MaskByteOffset] = N0.getOperand(0).getNode();
+ return true;
+}
+
+// Match 2 elements of a packed halfword bswap.
+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (N.getOpcode() == ISD::OR)
+ return isBSwapHWordElement(N.getOperand(0), Parts) &&
+ isBSwapHWordElement(N.getOperand(1), Parts);
+
+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
+ if (!C || C->getAPIntValue() != 16)
+ return false;
+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
+ return true;
+ }
+
+ return false;
+}
+
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Look for either
+ // (or (bswaphpair), (bswaphpair))
+ // (or (or (bswaphpair), (and)), (and))
+ // (or (or (and), (bswaphpair)), (and))
+ SDNode *Parts[4] = {};
+
+ if (isBSwapHWordPair(N0, Parts)) {
+ // (or (or (and), (and)), (or (and), (and)))
+ if (!isBSwapHWordPair(N1, Parts))
+ return SDValue();
+ } else if (N0.getOpcode() == ISD::OR) {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
+ return SDValue();
+ } else
+ return SDValue();
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+ SDValue(Parts[0], 0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, then
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
+ // fold (or x, undef) -> -1
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ if (const ConstantSDNode *N0O1C =
+ getAsNonOpaqueConstant(N0.getOperand(1))) {
+ if (const ConstantSDNode *N1O1C =
+ getAsNonOpaqueConstant(N1.getOperand(1))) {
+ // We can only do this xform if we know that bits from X that are set in
+ // C2 but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask = N0O1C->getAPIntValue();
+ const APInt &RHSMask = N1O1C->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getConstant(LHSMask | RHSMask, DL, VT));
+ }
+ }
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
+}
+
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+ SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == ISD::AND) {
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // x | x --> x
+ if (N0 == N1)
+ return N0;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ // do not return N0, because undef node may exist in N0
+ return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
+ // Do this only if the resulting shuffle is legal.
+ if (isa<ShuffleVectorSDNode>(N0) &&
+ isa<ShuffleVectorSDNode>(N1) &&
+ // Avoid folding a node with illegal type.
+ TLI.isTypeLegal(VT)) {
+ bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
+ bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
+ bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+ bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
+ // Ensure both shuffles have a zero input.
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
+ assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
+ assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ bool CanFold = true;
+ int NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> Mask(NumElts);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
+
+ // If one element is zero and the otherside is undef, keep undef.
+ // This also handles the case that both are undef.
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
+ Mask[i] = -1;
+ continue;
+ }
+
+ // Make sure only one of the elements is zero.
+ if (M0Zero == M1Zero) {
+ CanFold = false;
+ break;
+ }
+
+ assert((M0 >= 0 || M1 >= 0) && "Undef index!");
+
+ // We have a zero and non-zero element. If the non-zero came from
+ // SV0 make the index a LHS index. If it came from SV1, make it
+ // a RHS index. We need to mod by NumElts because we don't care
+ // which operand it came from in the original shuffles.
+ Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
+ }
+
+ if (CanFold) {
+ SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
+ SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
+ Mask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+ }
+ }
+
+ // fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+ // fold (or x, -1) -> -1
+ if (isAllOnesConstant(N1))
+ return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
+ return BSwap;
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
+ return BSwap;
+
+ // reassociate or
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ return ROR;
+
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) != 0 or c1/c2 are undef.
+ auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
+ return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(
+ ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
+ SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
+ AddToWorklist(IOR.getNode());
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
+ }
+ }
+
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ // See if this is some rotate idiom.
+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return Rot;
+
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
+ return SDValue();
+}
+
+static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND &&
+ DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ return Op.getOperand(0);
+ }
+ return Op;
+}
+
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+ SDValue &Mask) {
+ Op = stripConstantMask(DAG, Op, Mask);
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+ return false;
+}
+
+/// Helper function for visitOR to extract the needed side of a rotate idiom
+/// from a shl/srl/mul/udiv. This is meant to handle cases where
+/// InstCombine merged some outside op with one of the shifts from
+/// the rotate pattern.
+/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
+/// Otherwise, returns an expansion of \p ExtractFrom based on the following
+/// patterns:
+///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
+/// (or (mul v c0) (shrl (mul v c1) c2)):
+/// expands (mul v c0) -> (shl (mul v c1) c3)
+///
+/// (or (udiv v c0) (shl (udiv v c1) c2)):
+/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
+///
+/// (or (shl v c0) (shrl (shl v c1) c2)):
+/// expands (shl v c0) -> (shl (shl v c1) c3)
+///
+/// (or (shrl v c0) (shl (shrl v c1) c2)):
+/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
+///
+/// Such that in all cases, c3+c2==bitwidth(op v c1).
+static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
+ SDValue ExtractFrom, SDValue &Mask,
+ const SDLoc &DL) {
+ assert(OppShift && ExtractFrom && "Empty SDValue");
+ assert(
+ (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
+ "Existing shift must be valid as a rotate half");
+
+ ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
+ // Preconditions:
+ // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
+ //
+ // Find opcode of the needed shift to be extracted from (op0 v c0).
+ unsigned Opcode = ISD::DELETED_NODE;
+ bool IsMulOrDiv = false;
+ // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
+ // opcode or its arithmetic (mul or udiv) variant.
+ auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
+ IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
+ if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
+ return false;
+ Opcode = NeededShift;
+ return true;
+ };
+ // op0 must be either the needed shift opcode or the mul/udiv equivalent
+ // that the needed shift can be extracted from.
+ if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
+ (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
+ return SDValue();
+
+ // op0 must be the same opcode on both sides, have the same LHS argument,
+ // and produce the same value type.
+ if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
+ OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
+ ShiftedVT != ExtractFrom.getValueType())
+ return SDValue();
+
+ // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
+ ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
+ ConstantSDNode *ExtractFromCst =
+ isConstOrConstSplat(ExtractFrom.getOperand(1));
+ // TODO: We should be able to handle non-uniform constant vectors for these values
+ // Check that we have constant values.
+ if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
+ !OppLHSCst || !OppLHSCst->getAPIntValue() ||
+ !ExtractFromCst || !ExtractFromCst->getAPIntValue())
+ return SDValue();
+
+ // Compute the shift amount we need to extract to complete the rotate.
+ const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
+ if (OppShiftCst->getAPIntValue().ugt(VTWidth))
+ return SDValue();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
+ // Normalize the bitwidth of the two mul/udiv/shift constant operands.
+ APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
+ APInt OppLHSAmt = OppLHSCst->getAPIntValue();
+ zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
+
+ // Now try extract the needed shift from the ExtractFrom op and see if the
+ // result matches up with the existing shift's LHS op.
+ if (IsMulOrDiv) {
+ // Op to extract from is a mul or udiv by a constant.
+ // Check:
+ // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
+ // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
+ const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
+ NeededShiftAmt.getZExtValue());
+ APInt ResultAmt;
+ APInt Rem;
+ APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
+ if (Rem != 0 || ResultAmt != OppLHSAmt)
+ return SDValue();
+ } else {
+ // Op to extract from is a shift by a constant.
+ // Check:
+ // c2 - (bitwidth(op0 v c0) - c1) == c0
+ if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
+ ExtractFromAmt.getBitWidth()))
+ return SDValue();
+ }
+
+ // Return the expanded shift op that should allow a rotate to be formed.
+ EVT ShiftVT = OppShift.getOperand(1).getValueType();
+ EVT ResVT = ExtractFrom.getValueType();
+ SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
+ return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
+}
+
+// Return true if we can prove that, whenever Neg and Pos are both in the
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
+// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
+//
+// (or (shift1 X, Neg), (shift2 X, Pos))
+//
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
+// to consider shift amounts with defined behavior.
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+ SelectionDAG &DAG) {
+ // If EltSize is a power of 2 then:
+ //
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
+ //
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
+ // for the stronger condition:
+ //
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
+ //
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
+ // we can just replace Neg with Neg' for the rest of the function.
+ //
+ // In other cases we check for the even stronger condition:
+ //
+ // Neg == EltSize - Pos [B]
+ //
+ // for all Neg and Pos. Note that the (or ...) then invokes undefined
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
+ //
+ // We could actually use [A] whenever EltSize is a power of 2, but the
+ // only extra cases that it would match are those uninteresting ones
+ // where Neg and Pos are never in range at the same time. E.g. for
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // as well as (sub 32, Pos), but:
+ //
+ // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
+ //
+ // always invokes undefined behavior for 32-bit X.
+ //
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ unsigned MaskLoBits = 0;
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
+ unsigned Bits = Log2_64(EltSize);
+ if (NegC->getAPIntValue().getActiveBits() <= Bits &&
+ ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Bits;
+ }
+ }
+ }
+
+ // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
+ if (Neg.getOpcode() != ISD::SUB)
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
+ if (!NegC)
+ return false;
+ SDValue NegOp1 = Neg.getOperand(1);
+
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
+ // Pos'. The truncation is redundant for the purpose of the equality.
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
+ KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
+ if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
+ ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
+ MaskLoBits))
+ Pos = Pos.getOperand(0);
+ }
+ }
+
+ // The condition we need is now:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
+ //
+ // If NegOp1 == Pos then we need:
+ //
+ // EltSize & Mask == NegC & Mask
+ //
+ // (because "x & Mask" is a truncation and distributes through subtraction).
+ APInt Width;
+ if (Pos == NegOp1)
+ Width = NegC->getAPIntValue();
+
+ // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
+ // Then the condition we want to prove becomes:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
+ //
+ // which, again because "x & Mask" is a truncation, becomes:
+ //
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
+ return false;
+
+ // Now we just need to check that EltSize & Mask == Width & Mask.
+ if (MaskLoBits)
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
+ return Width.getLoBits(MaskLoBits) == 0;
+ return Width == EltSize;
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, unsigned PosOpcode,
+ unsigned NegOpcode, const SDLoc &DL) {
+ // fold (or (shl x, (*ext y)),
+ // (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y) or (rotr x, (sub 32, y))
+ //
+ // fold (or (shl x, (*ext (sub 32, y))),
+ // (srl x, (*ext y))) ->
+ // (rotr x, y) or (rotl x, (sub 32, y))
+ EVT VT = Shifted.getValueType();
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+ bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
+ HasPos ? Pos : Neg);
+ }
+
+ return SDValue();
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = hasOperation(ISD::ROTL, VT);
+ bool HasROTR = hasOperation(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR)
+ return SDValue();
+
+ // Check for truncated rotate.
+ if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
+ assert(LHS.getValueType() == RHS.getValueType());
+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
+ }
+ }
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
+
+ // If neither side matched a rotate half, bail
+ if (!LHSShift && !RHSShift)
+ return SDValue();
+
+ // InstCombine may have combined a constant shl, srl, mul, or udiv with one
+ // side of the rotate, so try to handle that here. In all cases we need to
+ // pass the matched shift from the opposite side to compute the opcode and
+ // needed shift amount to extract. We still want to do this if both sides
+ // matched a rotate half because one half may be a potential overshift that
+ // can be broken down (ie if InstCombine merged two shl or srl ops into a
+ // single one).
+
+ // Have LHS side of the rotate, try to extract the needed shift from the RHS.
+ if (LHSShift)
+ if (SDValue NewRHSShift =
+ extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
+ RHSShift = NewRHSShift;
+ // Have RHS side of the rotate, try to extract the needed shift from the LHS.
+ if (RHSShift)
+ if (SDValue NewLHSShift =
+ extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
+ LHSShift = NewLHSShift;
+
+ // If a side is still missing, nothing else we can do.
+ if (!RHSShift || !LHSShift)
+ return SDValue();
+
+ // At this point we've matched or extracted a shift op on each side.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return SDValue(); // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return SDValue(); // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+ };
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue Mask = AllOnes;
+
+ if (LHSMask.getNode()) {
+ SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
+ }
+ if (RHSMask.getNode()) {
+ SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
+ }
+
+ return Rot;
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return SDValue();
+
+ // If the shift amount is sign/zext/any-extended just peel it off.
+ SDValue LExtOp0 = LHSShiftAmt;
+ SDValue RExtOp0 = RHSShiftAmt;
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ LExtOp0 = LHSShiftAmt.getOperand(0);
+ RExtOp0 = RHSShiftAmt.getOperand(0);
+ }
+
+ SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
+
+ return SDValue();
+}
+
+namespace {
+
+/// Represents known origin of an individual byte in load combine pattern. The
+/// value of the byte is either constant zero or comes from memory.
+struct ByteProvider {
+ // For constant zero providers Load is set to nullptr. For memory providers
+ // Load represents the node which loads the byte from memory.
+ // ByteOffset is the offset of the byte in the value produced by the load.
+ LoadSDNode *Load = nullptr;
+ unsigned ByteOffset = 0;
+
+ ByteProvider() = default;
+
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
+ return ByteProvider(Load, ByteOffset);
+ }
+
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+
+ bool isConstantZero() const { return !Load; }
+ bool isMemory() const { return Load; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ }
+
+private:
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
+ : Load(Load), ByteOffset(ByteOffset) {}
+};
+
+} // end anonymous namespace
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns None if the provider can't be calculated.
+///
+/// For all the values except the root of the expression verifies that the value
+/// has exactly one use and if it's not true return None. This way if the origin
+/// of the byte is returned it's guaranteed that the values which contribute to
+/// the byte are not used outside of this expression.
+///
+/// Because the parts of the expression are not allowed to have more than one
+/// use this function iterates over trees, not DAGs. So it never visits the same
+/// node more than once.
+static const Optional<ByteProvider>
+calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
+ bool Root = false) {
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return None;
+
+ if (!Root && !Op.hasOneUse())
+ return None;
+
+ assert(Op.getValueType().isScalarInteger() && "can't handle other types");
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return None;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ if (!LHS)
+ return None;
+ auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ if (!RHS)
+ return None;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return None;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return None;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return None;
+ uint64_t ByteShift = BitShift / 8;
+
+ return Index < ByteShift
+ ? ByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1);
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (!L->isSimple() || L->isIndexed())
+ return None;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return ByteProvider::getMemory(L, Index);
+ }
+ }
+
+ return None;
+}
+
+static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+ return i;
+}
+
+static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+ return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and None if match failed.
+static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+ int64_t FirstOffset) {
+ // The endian can be decided only when it is 2 bytes at least.
+ unsigned Width = ByteOffsets.size();
+ if (Width < 2)
+ return None;
+
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < Width; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) && "It should be either big endian or"
+ "little endian");
+ return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+ switch (Value.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ return stripTruncAndExt(Value.getOperand(0));
+ }
+ return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+ // Collect all the stores in the chain.
+ SDValue Chain;
+ SmallVector<StoreSDNode *, 8> Stores;
+ for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
+ if (Store->getMemoryVT() != MVT::i8 ||
+ !Store->isSimple() || Store->isIndexed())
+ return SDValue();
+ Stores.push_back(Store);
+ Chain = Store->getChain();
+ }
+ // Handle the simple type only.
+ unsigned Width = Stores.size();
+ EVT VT = EVT::getIntegerVT(
+ *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ return SDValue();
+
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
+ SDValue CombinedValue;
+ SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ int64_t FirstOffset = INT64_MAX;
+ StoreSDNode *FirstStore = nullptr;
+ Optional<BaseIndexOffset> Base;
+ for (auto Store : Stores) {
+ // All the stores store different byte of the CombinedValue. A truncate is
+ // required to get that byte value.
+ SDValue Trunc = Store->getValue();
+ if (Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ // A shift operation is required to get the right byte offset, except the
+ // first byte.
+ int64_t Offset = 0;
+ SDValue Value = Trunc.getOperand(0);
+ if (Value.getOpcode() == ISD::SRL ||
+ Value.getOpcode() == ISD::SRA) {
+ ConstantSDNode *ShiftOffset =
+ dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
+ // a constant and a multiple of 8. It is the byte offset in "y".
+ //
+ // x = srl y, offset
+ // i8 z = trunc x
+ // store z, ...
+ if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ return SDValue();
+
+ Offset = ShiftOffset->getSExtValue()/8;
+ Value = Value.getOperand(0);
+ }
+
+ // Stores must share the same combined value with different offsets.
+ if (!CombinedValue)
+ CombinedValue = Value;
+ else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ return SDValue();
+
+ // The trunc and all the extend operation should be stripped to get the
+ // real value we are stored.
+ else if (CombinedValue.getValueType() != VT) {
+ if (Value.getValueType() == VT ||
+ Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+ CombinedValue = Value;
+ // Give up if the combined value type is smaller than the store size.
+ if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ return SDValue();
+ }
+
+ // Stores must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Remember the first byte store
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstStore = Store;
+ FirstOffset = ByteOffsetFromBase;
+ }
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ return SDValue();
+ ByteOffsets[Offset] = ByteOffsetFromBase;
+ }
+
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ assert(FirstStore && "First store must be set");
+
+ // Check if the bytes of the combined value we are looking at match with
+ // either big or little endian value store.
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single bswap if needed and store.
+
+ // If the store needs byte swap check if the target supports it
+ bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstStore->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ if (VT != CombinedValue.getValueType()) {
+ assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+ "Get unexpected store value to combine");
+ CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+ CombinedValue);
+ }
+
+ if (NeedsBswap)
+ CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+ SDValue NewStore =
+ DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+ // Rely on other DAG combine rules to remove the other individual stores.
+ DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+ return NewStore;
+}
+
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&] (ByteProvider P) {
+ assert(P.isMemory() && "Must be a memory byte provider");
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget
+ ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ };
+
+ Optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallPtrSet<LoadSDNode *, 8> Loads;
+ Optional<ByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ return SDValue();
+
+ LoadSDNode *L = P->Load;
+ assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
+ !L->isIndexed() &&
+ "Must be enforced by calculateByteProvider");
+ assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ ByteOffsetFromBase += MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+ assert(!Loads.empty() && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ LoadSDNode *FirstLoad = FirstByteProvider->Load;
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single load and bswap if needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *FirstLoad->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+}
+
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+// | A | |B|
+// ((x ^ y) & m) ^ y
+// | D |
+// Into:
+// (x & m) | (y & ~m)
+// If y is a constant, and the 'andn' does not work with immediates,
+// we unfold into a different pattern:
+// ~(~x & m) & (m | y)
+// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
+// the very least that breaks andnpd / andnps patterns, and because those
+// patterns are simplified in IR and shouldn't be created in the DAG
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+ assert(N->getOpcode() == ISD::XOR);
+
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // There are 3 commutable operators in the pattern,
+ // so we have to deal with 8 possible variants of the basic pattern.
+ SDValue X, Y, M;
+ auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+ if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+ return false;
+ SDValue Xor = And.getOperand(XorIdx);
+ if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
+ return false;
+ SDValue Xor0 = Xor.getOperand(0);
+ SDValue Xor1 = Xor.getOperand(1);
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesOrAllOnesSplat(Xor1))
+ return false;
+ if (Other == Xor0)
+ std::swap(Xor0, Xor1);
+ if (Other != Xor1)
+ return false;
+ X = Xor0;
+ Y = Xor1;
+ M = And.getOperand(XorIdx ? 0 : 1);
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
+ !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
+ return SDValue();
+
+ // Don't do anything if the mask is constant. This should not be reachable.
+ // InstCombine should have already unfolded this pattern, and DAGCombiner
+ // probably shouldn't produce it, too.
+ if (isa<ConstantSDNode>(M.getNode()))
+ return SDValue();
+
+ // We can transform if the target has AndNot
+ if (!TLI.hasAndNot(M))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // If Y is a constant, check that 'andn' works with immediates.
+ if (!TLI.hasAndNot(Y)) {
+ assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
+ SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
+ SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
+ return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
+ }
+
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+ SDValue NotM = DAG.getNOT(DL, M, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ SDLoc DL(N);
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // reassociate xor
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ unsigned N0Opcode = N0.getOpcode();
+ SDValue LHS, RHS, CC;
+ if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ LHS.getValueType().isInteger());
+ if (!LegalOperations ||
+ TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
+ switch (N0Opcode) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ SDLoc DL0(N0);
+ V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
+ DAG.getConstant(1, DL0, V.getValueType()));
+ AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (isAllOnesConstant(N1) && N0.hasOneUse() &&
+ (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
+ unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
+ }
+ }
+
+ // fold (not (neg x)) -> (add X, -1)
+ // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+ // Y is a constant or the subtract has a single use.
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ }
+
+ // fold (xor (and x, y), y) -> (and (not x), y)
+ if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
+ SDValue X = N0.getOperand(0);
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
+ AddToWorklist(NotX.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
+ }
+
+ if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
+ ConstantSDNode *XorC = isConstOrConstSplat(N1);
+ ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (XorC && ShiftC) {
+ // Don't crash on an oversized shift. We can not guarantee that a bogus
+ // shift has been simplified to undef.
+ uint64_t ShiftAmt = ShiftC->getLimitedValue();
+ if (ShiftAmt < BitWidth) {
+ APInt Ones = APInt::getAllOnesValue(BitWidth);
+ Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
+ if (XorC->getAPIntValue() == Ones) {
+ // If the xor constant is a shifted -1, do a 'not' before the shift:
+ // xor (X << ShiftC), XorC --> (not X) << ShiftC
+ // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
+ return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
+ }
+ }
+ }
+ }
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
+ SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
+ if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
+ SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
+ SDValue S0 = S.getOperand(0);
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, DL, VT, S0);
+ }
+ }
+ }
+
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+ N0.getOperand(1));
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0Opcode == N1.getOpcode())
+ if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+ return V;
+
+ // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
+ if (SDValue MM = unfoldMaskedMerge(N))
+ return MM;
+
+ // Simplify the expression using non-local knowledge.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+ // Match a one-use bitwise logic op.
+ SDValue LogicOp = Shift->getOperand(0);
+ if (!LogicOp.hasOneUse())
+ return SDValue();
+
+ unsigned LogicOpcode = LogicOp.getOpcode();
+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+ LogicOpcode != ISD::XOR)
+ return SDValue();
+
+ // Find a matching one-use shift by constant.
+ unsigned ShiftOpcode = Shift->getOpcode();
+ SDValue C1 = Shift->getOperand(1);
+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+ assert(C1Node && "Expected a shift with constant operand");
+ const APInt &C1Val = C1Node->getAPIntValue();
+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+ const APInt *&ShiftAmtVal) {
+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+ return false;
+
+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+ if (!ShiftCNode)
+ return false;
+
+ // Capture the shifted operand and shift amount value.
+ ShiftOp = V.getOperand(0);
+ ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+ // Shift amount types do not have to match their operand type, so check that
+ // the constants are the same width.
+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+ return false;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ SDValue X, Y;
+ const APInt *C0Val;
+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+ Y = LogicOp.getOperand(1);
+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+ Y = LogicOp.getOperand(0);
+ else
+ return SDValue();
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ SDLoc DL(Shift);
+ EVT VT = Shift->getValueType(0);
+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+/// shift (binop X, C0), C1
+/// And want to transform into:
+/// binop (shift X, C1), (shift C0, C1)
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
+
+ // Do not turn a 'not' into a regular xor.
+ if (isBitwiseNot(N->getOperand(0)))
+ return SDValue();
+
+ // The inner binop must be one-use, since we want to replace it.
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
+ return SDValue();
+
+ // TODO: This is limited to early combining because it may reveal regressions
+ // otherwise. But since we just checked a target hook to see if this is
+ // desirable, that should have filtered out cases where this interferes
+ // with some other pattern matching.
+ if (!LegalTypes)
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ switch (LHS.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant and not opaque as well.
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
+ if (!BinOpCst)
+ return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant
+ // or is copy/select. Enable this in other cases when figure out it's exactly
+ // profitable.
+ SDValue BinOpLHSVal = LHS.getOperand(0);
+ bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+ BinOpLHSVal.getOpcode() == ISD::SRA ||
+ BinOpLHSVal.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+ bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+ if (!IsShiftByConstant && !IsCopyOrSelect)
+ return SDValue();
+
+ if (IsCopyOrSelect && N->hasOneUse())
+ return SDValue();
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
+ N->getOperand(1));
+ assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
+
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ EVT TruncVT = N->getValueType(0);
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+ TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+ if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+ SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+ AddToWorklist(Trunc00.getNode());
+ AddToWorklist(Trunc01.getNode());
+ return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+ SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ unsigned Bitsize = VT.getScalarSizeInBits();
+
+ // fold (rot x, 0) -> x
+ if (isNullOrNullSplat(N1))
+ return N0;
+
+ // fold (rot x, c) -> x iff (c % BitSize) == 0
+ if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
+ APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
+ if (DAG.MaskedValueIsZero(N1, ModuloMask))
+ return N0;
+ }
+
+ // fold (rot x, c) -> (rot x, c % BitSize)
+ // TODO - support non-uniform vector amounts.
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
+ if (Cst->getAPIntValue().uge(Bitsize)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
+ return DAG.getNode(N->getOpcode(), dl, VT, N0,
+ DAG.getConstant(RotAmt, dl, N1.getValueType()));
+ }
+ }
+
+ // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
+ }
+
+ unsigned NextOp = N0.getOpcode();
+ // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+ if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
+ SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
+ if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
+ EVT ShiftVT = C1->getValueType(0);
+ bool SameSide = (N->getOpcode() == NextOp);
+ unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
+ if (SDValue CombinedShift =
+ DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
+ BitsizeC.getNode());
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
+ }
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+ // If setcc produces all-one true value then:
+ // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+ N01CV, N1CV))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ }
+ }
+ }
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // fold (shl c1, c2) -> c1<<c2
+ // TODO - support non-uniform vector shift amounts.
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N0.getOpcode() == ISD::SHL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
+ }
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+ EVT InnerVT = N0Op0.getValueType();
+ uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
+
+ auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+ SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+ Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
+ }
+ }
+
+ // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
+ // Only fold this if the inner zext has no other uses to avoid increasing
+ // the total number of instructions.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+ auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2);
+ return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+ NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
+ }
+
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
+ N0->getFlags().hasExact()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t C1 = N0C1->getZExtValue();
+ uint64_t C2 = N1C->getZExtValue();
+ SDLoc DL(N);
+ if (C1 <= C2)
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(C2 - C1, DL, ShiftVT));
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
+ DAG.getConstant(C1 - C2, DL, ShiftVT));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ // TODO - drop hasOneUse requirement if c1 == c2?
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
+ uint64_t c1 = N0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask <<= c2 - c1;
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, DL, ShiftVT));
+ } else {
+ Mask.lshrInPlace(c1 - c2);
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, DL, ShiftVT));
+ }
+ SDLoc DL(N0);
+ return DAG.getNode(ISD::AND, DL, VT, Shift,
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+ }
+
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true)) {
+ SDLoc DL(N);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
+ SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
+ }
+
+ // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ // Variant of version done on multiply, except mul by a power of 2 is turned
+ // into a shift.
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+ N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ AddToWorklist(Shl0.getNode());
+ AddToWorklist(Shl1.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ }
+
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ if (isConstantOrConstantVector(Shl))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
+ }
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N))
+ return NewSHL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ // fold (sra 0, x) -> 0
+ // fold (sra -1, x) -> -1
+ if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+ return N0;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ // TODO - support non-uniform vector shift amounts.
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ // clamp (add c1, c2) to max shift.
+ if (N0.getOpcode() == ISD::SRA) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ EVT ShiftSVT = ShiftVT.getScalarType();
+ SmallVector<SDValue, 16> ShiftValues;
+
+ auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ APInt Sum = c1 + c2;
+ unsigned ShiftSum =
+ Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
+ ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
+ return true;
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
+ SDValue ShiftValue;
+ if (VT.isVector())
+ ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
+ else
+ ShiftValue = ShiftValues[0];
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL && N1C) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // Determine the residual right-shift amount.
+ int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL,
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+ // sra (add (shl X, N1C), AddC), N1C -->
+ // sext (add (trunc X to (width - N1C)), AddC')
+ if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+ if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+ SDValue Shl = N0.getOperand(0);
+ // Determine what the truncate's type would be and ask the target if that
+ // is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but that
+ // restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+ trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
+ // if c1 is equal to the number of bits the trunc removes
+ // TODO - support non-uniform vector shift amounts.
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ EVT LargeVT = N0Op0.getValueType();
+ unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+ if (LargeShift->getAPIntValue() == TruncBits) {
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
+ getShiftAmountTy(LargeVT));
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
+ }
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N))
+ return NewSRA;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = DAG.simplifyShift(N0, N1))
+ return V;
+
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ // TODO - support non-uniform vector shift amounts.
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N0.getOpcode() == ISD::SRL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
+ }
+ }
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ uint64_t c1 = N001C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ N0.getOperand(0).getOperand(0),
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
+ }
+ }
+ }
+
+ // fold (srl (shl x, c), c) -> (and x, cst2)
+ // TODO - (srl (shl x, c1), c2).
+ if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ SDValue Mask =
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
+ AddToWorklist(Mask.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
+ }
+
+ // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getAPIntValue().uge(BitSize))
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDLoc DL0(N0);
+ SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, DL0,
+ getShiftAmountTy(SmallVT)));
+ AddToWorklist(SmallShift.getNode());
+ APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
+ KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~Known.Zero;
+ if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if (UnknownBits.isPowerOf2()) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ SDLoc DL(N0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(ShAmt, DL,
+ getShiftAmountTy(Op.getValueType())));
+ AddToWorklist(Op.getNode());
+ }
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ Op, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ // TODO - support non-uniform vector shift amounts.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N))
+ return NewSRL;
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorklist(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorklist(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ bool IsFSHL = N->getOpcode() == ISD::FSHL;
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (fshl N0, N1, 0) -> N0
+ // fold (fshr N0, N1, 0) -> N1
+ if (isPowerOf2_32(BitWidth))
+ if (DAG.MaskedValueIsZero(
+ N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+ return IsFSHL ? N0 : N1;
+
+ auto IsUndefOrZero = [](SDValue V) {
+ return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+ };
+
+ // TODO - support non-uniform vector shift amounts.
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ if (Cst->getAPIntValue().uge(BitWidth)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
+ }
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
+ }
+
+ // fold (fshl N0, N0, N2) -> (rotl N0, N2)
+ // fold (fshr N0, N0, N2) -> (rotr N0, N2)
+ // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
+ // is legal as well we might be better off avoiding non-constant (BW - N2).
+ unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (N0 == N1 && hasOperation(RotOpc, VT))
+ return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+
+ // Simplify, based on bits shifted out of N0/N1.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (bswap c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+ // fold (bswap (bswap x)) -> x
+ if (N0.getOpcode() == ISD::BSWAP)
+ return N0->getOperand(0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
+ // fold (bitreverse (bitreverse x)) -> x
+ if (N0.getOpcode() == ISD::BITREVERSE)
+ return N0.getOperand(0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+// FIXME: This should be checking for no signed zeros on individual operands, as
+// well as no nans.
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+ SDValue RHS,
+ const TargetLowering &TLI) {
+ const TargetOptions &Options = DAG.getTarget().Options;
+ EVT VT = LHS.getValueType();
+
+ return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ TLI.isProfitableToCombineMinNumMaxNum(VT) &&
+ DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+}
+
+/// Generate Min/Max node
+static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ switch (CC) {
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ // Since it's known never nan to get here already, either fminnum or
+ // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+ // expanded in terms of it.
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
+ unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: {
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
+ unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
+ if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+ "Expected select-of-constants");
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+ SDLoc DL(N);
+
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ if (CondVT == MVT::i1 && !LegalOperations) {
+ if (C1->isNullValue() && C2->isOne()) {
+ // select Cond, 0, 1 --> zext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isNullValue() && C2->isAllOnesValue()) {
+ // select Cond, 0, -1 --> sext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isOne() && C2->isNullValue()) {
+ // select Cond, 1, 0 --> zext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+ if (C1->isAllOnesValue() && C2->isNullValue()) {
+ // select Cond, -1, 0 --> sext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
+ if (TLI.convertSelectOfConstantsToMath(VT)) {
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+ if (C1Val - 1 == C2Val) {
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ if (C1Val + 1 == C2Val) {
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+ }
+
+ return SDValue();
+ }
+
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isNullValue() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+ SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or C, Y)
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N2);
+
+ if (SDValue V = foldSelectOfConstants(N))
+ return V;
+
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
+ SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
+ AddToWorklist(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
+ SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
+ AddToWorklist(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
+ }
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
+ return DAG.getNode(ISD::AND, DL, VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ if (VT0 == MVT::i1) {
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its preferred form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence =
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+ // Create the actual and node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
+ N2, Flags);
+ }
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+ // Create the actual or node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ N2_2, Flags);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
+ N2_2, Flags);
+ }
+ }
+ }
+
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
+ }
+
+ // Fold selects based on a setcc into other things, such as min/max/abs.
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ // select (fcmp lt x, y), x, y -> fminnum x, y
+ // select (fcmp gt x, y), x, y -> fmaxnum x, y
+ //
+ // This is OK if we don't care what happens if either operand is a NaN.
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
+ CC, TLI, DAG))
+ return FMinMax;
+
+ // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+ // This is conservatively limited to pre-legal-operations to give targets
+ // a chance to reverse the transform if they want to do that. Also, it is
+ // unlikely that the pattern would be formed late, so it's probably not
+ // worth going through the other checks.
+ if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+ CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+ N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+ auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+ auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+ if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+ // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+ // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+ //
+ // The IR equivalent of this transform would have this form:
+ // %a = add %x, C
+ // %c = icmp ugt %x, ~C
+ // %r = select %c, -1, %a
+ // =>
+ // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+ // %u0 = extractvalue %u, 0
+ // %u1 = extractvalue %u, 1
+ // %r = select %u1, -1, %u0
+ SDVTList VTs = DAG.getVTList(VT, VT0);
+ SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+ return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+ }
+ }
+
+ if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
+ (!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+ // Any flags available in a select/setcc fold will be on the setcc as they
+ // migrated from fcmp
+ Flags = N0.getNode()->getFlags();
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+ N2, N0.getOperand(2));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
+
+ return SimplifySelect(DL, N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+ // binary ones here.
+ if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+ return SDValue();
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, DL, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Chain = MSC->getChain();
+ SDLoc DL(N);
+
+ // Zap scatters with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Chain = MST->getChain();
+ SDLoc DL(N);
+
+ // Zap masked stores with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDLoc DL(N);
+
+ // Zap gathers with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // Zap masked loads with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+ return SDValue();
+}
+
+/// A vector select of 2 constant vectors can be simplified to math/logic to
+/// avoid a variable select instruction and possibly avoid constant loads.
+SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
+ !TLI.convertSelectOfConstantsToMath(VT) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
+ return SDValue();
+
+ // Check if we can use the condition value to increment/decrement a single
+ // constant value. This simplifies a select to an add and removes a constant
+ // load/materialization from the general case.
+ bool AllAddOne = true;
+ bool AllSubOne = true;
+ unsigned Elts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != Elts; ++i) {
+ SDValue N1Elt = N1.getOperand(i);
+ SDValue N2Elt = N2.getOperand(i);
+ if (N1Elt.isUndef() || N2Elt.isUndef())
+ continue;
+
+ const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
+ const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+ if (C1 != C2 + 1)
+ AllAddOne = false;
+ if (C1 != C2 - 1)
+ AllSubOne = false;
+ }
+
+ // Further simplifications for the extra-special cases where the constants are
+ // all 0 or all -1 should be implemented as folds of these patterns.
+ SDLoc DL(N);
+ if (AllAddOne || AllSubOne) {
+ // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
+ // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
+ auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
+ }
+
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
+ // The general case for select-of-constants:
+ // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
+ // ...but that only makes sense if a vselect is slower than 2 logic ops, so
+ // leave that to a machine-specific pass.
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
+ // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+ return DAG.getSelect(DL, VT, F, N2, N1);
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ return DAG.getNode(ISD::ABS, DL, VT, LHS);
+
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DL, getShiftAmountTy(VT)));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+
+ // vselect x, y (fcmp lt x, y) -> fminnum x, y
+ // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
+ return FMinMax;
+ }
+
+ // If this select has a condition (setcc) with narrower operands than the
+ // select, try to widen the compare to match the select width.
+ // TODO: This should be extended to handle any constant.
+ // TODO: This could be extended to handle non-loading patterns, but that
+ // requires thorough testing to avoid regressions.
+ if (isNullOrNullSplat(RHS)) {
+ EVT NarrowVT = LHS.getValueType();
+ EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
+ EVT SetCCVT = getSetCCResultType(LHS.getValueType());
+ unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
+ unsigned WideWidth = WideVT.getScalarSizeInBits();
+ bool IsSigned = isSignedIntSetCC(CC);
+ auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
+ SetCCWidth != 1 && SetCCWidth < WideWidth &&
+ TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
+ // Both compare operands can be widened for free. The LHS can use an
+ // extended load, and the RHS is a constant:
+ // vselect (ext (setcc load(X), C)), N1, N2 -->
+ // vselect (setcc extload(X), C'), N1, N2
+ auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
+ SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
+ EVT WideSetCCVT = getSetCCResultType(WideVT);
+ SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
+ return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
+ }
+ }
+ }
+
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // Fold (vselect (build_vector all_ones), N1, N2) -> N1
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N2;
+
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
+ return CV;
+ }
+
+ if (SDValue V = foldVSelectOfConstants(N))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
+ CC, SDLoc(N), false)) {
+ AddToWorklist(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ } else if (SCC->isUndef()) {
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ return N2;
+ } else if (SCC.getOpcode() == ISD::SETCC) {
+ // Fold to a simpler select_cc
+ SDValue SelectOp = DAG.getNode(
+ ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SelectOp->setFlags(SCC->getFlags());
+ return SelectOp;
+ }
+ }
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ // setcc is very commonly used as an argument to brcond. This pattern
+ // also lend itself to numerous combines and, as a result, it is desired
+ // we keep the argument to a brcond as a setcc as much as possible.
+ bool PreferSetCC =
+ N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
+
+ SDValue Combined = SimplifySetCC(
+ N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
+
+ if (!Combined)
+ return SDValue();
+
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
+
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
+
+ if (NewSetCC)
+ return NewSetCC;
+ }
+
+ return Combined;
+}
+
+SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (isNullConstant(Carry))
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
+ && "Expected EXTEND dag node in input!");
+
+ // fold (sext c1) -> c1
+ // fold (zext c1) -> c1
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(Opcode, DL, VT, N0);
+
+ // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+ // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ if (N0->getOpcode() == ISD::SELECT) {
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+ (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+ // For any_extend, choose sign extension of the constants to allow a
+ // possible further transform to sign_extend_inreg.i.e.
+ //
+ // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+ // t2: i64 = any_extend t1
+ // -->
+ // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+ // -->
+ // t4: i64 = sign_extend_inreg t3
+ unsigned FoldOpc = Opcode;
+ if (FoldOpc == ISD::ANY_EXTEND)
+ FoldOpc = ISD::SIGN_EXTEND;
+ return DAG.getSelect(DL, VT, N0->getOperand(0),
+ DAG.getNode(FoldOpc, DL, VT, Op1),
+ DAG.getNode(FoldOpc, DL, VT, Op2));
+ }
+ }
+
+ // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+ EVT SVT = VT.getScalarType();
+ if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+ return SDValue();
+
+ // We can fold this node into a build_vector.
+ unsigned VTBits = SVT.getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // For zero-extensions, UNDEF elements still guarantee to have the upper
+ // bits set to zero.
+ bool IsZext =
+ Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.isUndef()) {
+ Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
+ continue;
+ }
+
+ SDLoc DL(Op);
+ // Get the constant value and if needed trunc it to the size of the type.
+ // Nodes like build_vector might have constants wider than the scalar type.
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+ if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
+ else
+ Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
+ }
+
+ return DAG.getBuildVector(VT, DL, Elts);
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVectorImpl<SDNode *> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ SDLoc DL(ExtLoad);
+ for (SDNode *SetCC : SetCCs) {
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == OrigLoad)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ }
+}
+
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || !LN0->isSimple() ||
+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+ assert(N->getOpcode() == ISD::ZERO_EXTEND);
+ EVT VT = N->getValueType(0);
+ EVT OrigVT = N->getOperand(0).getValueType();
+ if (TLI.isZExtFree(OrigVT, VT))
+ return SDValue();
+
+ // and/or/xor
+ SDValue N0 = N->getOperand(0);
+ if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) ||
+ N0.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+ return SDValue();
+
+ // shl/shr
+ SDValue N1 = N0->getOperand(0);
+ if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+ N1.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+ return SDValue();
+
+ // load
+ if (!isa<LoadSDNode>(N1.getOperand(0)))
+ return SDValue();
+ LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+ EVT MemVT = Load->getMemoryVT();
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+ return SDValue();
+
+
+ // If the shift op is SHL, the logic op must be AND, otherwise the result
+ // will be wrong.
+ if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI))
+ return SDValue();
+
+ // Actually do the transformation.
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getMemoryVT(), Load->getMemOperand());
+
+ SDLoc DL1(N1);
+ SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+ N1.getOperand(1));
+
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL0(N0);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+ DAG.getConstant(Mask, DL0, VT));
+
+ ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ CombineTo(N, And);
+ if (SDValue(Load, 0).hasOneUse()) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+ Load->getValueType(0), ExtLoad);
+ CombineTo(Load, Trunc, ExtLoad.getValue(1));
+ }
+
+ // N0 is dead at this point.
+ recursivelyDeleteUnusedNodes(N0.getNode());
+
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
+/// If we're narrowing or widening the result of a vector select and the final
+/// size is the same size as a setcc (compare) feeding the select, then try to
+/// apply the cast operation to the select's operands because matching vector
+/// sizes for a select condition and other operands should be more efficient.
+SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
+ unsigned CastOpcode = Cast->getOpcode();
+ assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
+ CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
+ CastOpcode == ISD::FP_ROUND) &&
+ "Unexpected opcode for vector select narrowing/widening");
+
+ // We only do this transform before legal ops because the pattern may be
+ // obfuscated by target-specific operations after legalization. Do not create
+ // an illegal select op, however, because that may be difficult to lower.
+ EVT VT = Cast->getValueType(0);
+ if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ SDValue VSel = Cast->getOperand(0);
+ if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
+ VSel.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Does the setcc have the same vector size as the casted select?
+ SDValue SetCC = VSel.getOperand(0);
+ EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
+ if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
+ SDValue A = VSel.getOperand(1);
+ SDValue B = VSel.getOperand(2);
+ SDValue CastA, CastB;
+ SDLoc DL(Cast);
+ if (CastOpcode == ISD::FP_ROUND) {
+ // FP_ROUND (fptrunc) has an extra flag operand to pass along.
+ CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
+ CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
+ } else {
+ CastA = DAG.getNode(CastOpcode, DL, VT, A);
+ CastB = DAG.getNode(CastOpcode, DL, VT, B);
+ }
+ return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
+}
+
+// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N,
+ SDValue N0, ISD::LoadExtType ExtLoadType) {
+ SDNode *N0Node = N0.getNode();
+ bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
+ : ISD::isZEXTLoad(N0Node);
+ if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
+ !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((LegalOperations || !LN0->isSimple() ||
+ VT.isVector()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
+ return SDValue();
+
+ SDValue ExtLoad =
+ DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ Combiner.CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ if (LN0->use_empty())
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// Only generate vector extloads when 1) they're legal, and 2) they are
+// deemed desirable by the target.
+static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!ISD::isNON_EXTLoad(N0.getNode()) ||
+ !ISD::isUNINDEXEDLoad(N0.getNode()) ||
+ ((LegalOperations || VT.isVector() ||
+ !cast<LoadSDNode>(N0)->isSimple()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
+ return {};
+
+ bool DoXform = true;
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (!DoXform)
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ Combiner.CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
+ } else {
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+ Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SDLoc dl(Ld);
+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(),
+ Ld->getMemOperand(), ExtLoadType,
+ Ld->isExpandingLoad());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
+ return NewLoad;
+}
+
+static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
+ bool LegalOperations) {
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
+
+ SDValue SetCC = N->getOperand(0);
+ if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
+ !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDValue X = SetCC.getOperand(0);
+ SDValue Ones = SetCC.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT XVT = X.getValueType();
+ // setge X, C is canonicalized to setgt, so we do not need to match that
+ // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
+ // not require the 'not' op.
+ if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
+ // Invert and smear/shift the sign bit:
+ // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
+ // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
+ SDLoc DL(N);
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+ auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getScalarValueSizeInBits();
+ unsigned MidBits = N0.getScalarValueSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // Try to simplify (sext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
+ return foldedExt;
+
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
+ ISD::SIGN_EXTEND))
+ return foldedExt;
+
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // Try to simplify (sext (sextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
+ return foldedExt;
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
+ SmallVector<SDNode*, 4> SetCCs;
+ bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
+ CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT N00VT = N0.getOperand(0).getValueType();
+
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(N00VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = getSetCCResultType(N00VT);
+
+ // If we already have the desired type, don't change it.
+ if (SVT != N0.getValueType()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+ // If the desired elements are smaller or larger than the source
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+ }
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
+ // Here, T can be 1 or -1, depending on the type of the setcc and
+ // getBooleanContents().
+ unsigned SetCCWidth = N0.getScalarValueSizeInBits();
+
+ // To determine the "true" side of the select, we need to know the high bit
+ // of the value returned by the setcc if it evaluates to true.
+ // If the type of the setcc is i1, then the true case of the select is just
+ // sext(i1 1), that is, -1.
+ // If the type of the setcc is larger (say, i8) then the value of the high
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
+ // of the appropriate width.
+ SDValue ExtTrueVal = (SetCCWidth == 1)
+ ? DAG.getAllOnesConstant(DL, VT)
+ : DAG.getBoolConstant(true, DL, VT, N00VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC =
+ SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
+ return SCC;
+
+ if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
+ }
+ }
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ // Eliminate this sign extend by doing a negation in the destination type:
+ // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isNullOrNullSplat(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ }
+ // Eliminate this sign extend by doing a decrement in the destination type:
+ // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
+ return SDValue();
+}
+
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
+// computeKnownBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ KnownBits &Known) {
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ Known = DAG.computeKnownBits(Op);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ if (isNullOrNullSplat(Op0))
+ Op = Op1;
+ else if (isNullOrNullSplat(Op1))
+ Op = Op0;
+ else
+ return false;
+
+ Known = DAG.computeKnownBits(Op);
+
+ return (Known.Zero | 1).isAllOnesValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ SDValue Op;
+ KnownBits Known;
+ if (isTruncateOf(DAG, N0, Op, Known)) {
+ APInt TruncatedBits =
+ (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
+ APInt(Op.getScalarValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getScalarValueSizeInBits(),
+ N0.getScalarValueSizeInBits(),
+ std::min(Op.getScalarValueSizeInBits(),
+ VT.getScalarSizeInBits()));
+ if (TruncatedBits.isSubsetOf(Known.Zero))
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT) && VT.isVector()) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ // Transfer the debug info; the new node is equivalent to N0.
+ DAG.transferDbgValues(N0, ZExtOrTrunc);
+ return ZExtOrTrunc;
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ AddToWorklist(Op.getNode());
+ SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ // We may safely transfer the debug info describing the truncate node over
+ // to the equivalent and operation.
+ DAG.transferDbgValues(N0, And);
+ return And;
+ }
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
+ }
+
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND))
+ return foldedExt;
+
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
+ DoXform = false;
+ }
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
+ CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+ // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+ if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+ return ZExtLoad;
+
+ // Try to simplify (zext (zextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
+ return foldedExt;
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // Only do this before legalize for now.
+ if (!LegalOperations && VT.isVector() &&
+ N0.getValueType().getVectorElementType() == MVT::i1) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the #
+ // elements of the compare (and the # elements of the compare result for
+ // that matter). Check to see that they are the same size. If so, we know
+ // that the element size of the sext'd result matches the element size of
+ // the compare operands.
+ SDLoc DL(N);
+ SDValue VecOnes = DAG.getConstant(1, DL, VT);
+ if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
+ }
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
+ VecOnes);
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
+ InnerZExt.getOperand(0).getValueSizeInBits();
+ if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDLoc DL(N);
+ SDValue X = N0.getOperand(0).getOperand(0);
+ X = DAG.getAnyExtOrTrunc(X, DL, VT);
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
+ TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = N0.hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ ISD::LoadExtType ExtType = LN0->getExtensionType();
+ EVT MemVT = LN0->getMemoryVT();
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // For vectors:
+ // aext(setcc) -> vsetcc
+ // aext(setcc) -> truncate(vsetcc)
+ // aext(setcc) -> aext(vsetcc)
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N00VT.getSizeInBits())
+ return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/any extend
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAssertExt(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT AssertVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
+ if (N0.getOpcode() == Opcode &&
+ AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == Opcode) {
+ // We have an assert, truncate, assert sandwich. Make one stronger assert
+ // by asserting on the smallest asserted type to the larger source type.
+ // This eliminates the later assert:
+ // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
+ // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
+
+ SDLoc DL(N);
+ EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
+ SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), MinAssertVTVal);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+
+ // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+ // than X. Just move the AssertZext in front of the truncate and drop the
+ // AssertSExt.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ Opcode == ISD::AssertZext) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
+
+ if (AssertVT.bitsLT(BigA_AssertVT)) {
+ SDLoc DL(N);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+ }
+ }
+
+ return SDValue();
+}
+
+/// If the result of a wider load is shifted to right of N bits and then
+/// truncated to a narrower type and where N is a multiple of number of bits of
+/// the narrower type, transform it to a narrower load from address + N / num of
+/// bits of new type. Also narrow the load if the result is masked with an AND
+/// to effectively produce a smaller type. If the result is to be extended, also
+/// fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ unsigned ShAmt = 0;
+ bool HasShiftedOffset = false;
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL) {
+ // Another special-case: SRL is basically zero-extending a narrower value,
+ // or it maybe shifting a higher subword, half or byte into the lowest
+ // bits.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+
+ auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
+ auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01 || !LN0)
+ return SDValue();
+
+ uint64_t ShiftAmt = N01->getZExtValue();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
+ else
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - ShiftAmt);
+ } else if (Opc == ISD::AND) {
+ // An AND with a constant mask is the same as a truncate + zero-extend.
+ auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!AndC)
+ return SDValue();
+
+ const APInt &Mask = AndC->getAPIntValue();
+ unsigned ActiveBits = 0;
+ if (Mask.isMask()) {
+ ActiveBits = Mask.countTrailingOnes();
+ } else if (Mask.isShiftedMask()) {
+ ShAmt = Mask.countTrailingZeros();
+ APInt ShiftedMask = Mask.lshr(ShAmt);
+ ActiveBits = ShiftedMask.countTrailingOnes();
+ HasShiftedOffset = true;
+ } else
+ return SDValue();
+
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ SDValue SRL = N0;
+ if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
+ ShAmt = ConstShift->getZExtValue();
+ unsigned EVTBits = ExtVT.getSizeInBits();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ auto *LN0 = cast<LoadSDNode>(N0);
+
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (LN0->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
+ return SDValue();
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt &ShiftMask =
+ cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
+ ShiftMask.countTrailingOnes());
+ // If the mask is smaller, recompute the type.
+ if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
+ }
+ }
+ }
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it.
+ if (!isa<LoadSDNode>(N0))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ // Reducing the width of a volatile load is illegal. For atomics, we may be
+ // able to reduce the width provided we never widen again. (see D66309)
+ if (!LN0->isSimple() ||
+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
+ return SDValue();
+
+ auto AdjustBigEndianShift = [&](unsigned ShAmt) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ return LVTStoreBits - EVTStoreBits - ShAmt;
+ };
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
+
+ EVT PtrType = N0.getOperand(1).getValueType();
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDLoc DL(LN0);
+ // The original load itself didn't wrap, so an offset within it doesn't.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, DL, PtrType),
+ Flags);
+ AddToWorklist(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ else
+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
+ NewAlign, LN0->getMemOperand()->getFlags(),
+ LN0->getAAInfo());
+
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ if (ShLeftAmt >= VT.getSizeInBits())
+ Result = DAG.getConstant(0, DL, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, DL, VT,
+ Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
+ }
+
+ if (HasShiftedOffset) {
+ // Recalculate the shift amount after it has been altered to calculate
+ // the offset.
+ if (DAG.getDataLayout().isBigEndian())
+ ShAmt = AdjustBigEndianShift(ShAmt);
+
+ // We're using a shifted mask, so the load now has an offset. This means
+ // that data has been loaded into the lower bytes than it would have been
+ // before, so we need to shl the loaded data into the correct position in the
+ // register.
+ SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned EVTBits = EVT.getScalarSizeInBits();
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (sext_in_reg c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ N0.getOperand(0), N1);
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ if ((N00Bits <= EVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
+ }
+
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
+ if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
+ }
+
+ // fold (sext_in_reg (zext x)) -> (sext x)
+ // iff we are extending the source sign bit.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getScalarValueSizeInBits() == EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ // If sextload is not supported by target, we can only do the combine when
+ // load has one use. Doing otherwise can block folding the extload with other
+ // extends that the target does support.
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
+ N0.hasOneUse()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ BSwap, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ return Res;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+
+ // noop truncate
+ if (SrcVT == VT)
+ return N0;
+
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (truncate c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ // if the source is smaller than the dest, we still need an extend.
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // if the source is larger than the dest, than we just need the truncate.
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+ return SDValue();
+
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
+ DAG.getBitcast(NVT, N0.getOperand(0)),
+ DAG.getConstant(Index, DL, IndexTy));
+ }
+ }
+
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
+ // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
+ if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+ TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known = DAG.computeKnownBits(Amt);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
+ }
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
+ }
+ }
+
+ // Attempt to pre-truncate BUILD_VECTOR sources.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 8> TruncOps;
+ for (const SDValue &Op : N0->op_values()) {
+ SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+ TruncOps.push_back(TruncOp);
+ }
+ return DAG.getBuildVector(VT, DL, TruncOps);
+ }
+
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ APInt Mask =
+ APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+ if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
+ }
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ if (SDValue Reduced = ReduceLoadWidth(N))
+ return Reduced;
+
+ // Handle the case where the load remains an extending load even
+ // after truncation.
+ if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isSimple() &&
+ LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
+ return NewLoad;
+ }
+ }
+ }
+
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (!X.isUndef()) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
+ AddToWorklist(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
+ }
+ }
+
+ // Fold truncate of a bitcast of a vector to an extract of the low vector
+ // element.
+ //
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
+ if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
+ SDValue VecSrc = N0.getOperand(0);
+ EVT SrcVT = VecSrc.getValueType();
+ if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ SDLoc SL(N);
+
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
+ VecSrc, DAG.getConstant(Idx, SL, IdxVT));
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
+ N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
+ // We only do for addcarry before legalize operation
+ ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ SDLoc SL(N);
+ auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ auto VTs = DAG.getVTList(VT, N0->getValueType(1));
+ return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
+ }
+
+ // fold (truncate (extract_subvector(ext x))) ->
+ // (extract_subvector x)
+ // TODO: This can be generalized to cover cases where the truncate and extract
+ // do not fully cancel each other out.
+ if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) {
+ if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
+ VT.getVectorElementType())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
+ N00.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ // Narrow a suitable binary operation with a non-opaque constant operand by
+ // moving it ahead of the truncate. This is limited to pre-legalization
+ // because targets may prefer a wider type during later combines and invert
+ // this transform.
+ switch (N0.getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ if (!LegalOperations && N0.hasOneUse() &&
+ (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ isConstantOrConstantVector(N0.getOperand(1), true))) {
+ // TODO: We already restricted this to pre-legalization, but for vectors
+ // we are extra cautious to not create an unsupported operation.
+ // Target-specific changes are likely needed to avoid regressions here.
+ if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
+ SDLoc DL(N);
+ SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+ // A BUILD_PAIR is always having the least significant part in elt 0 and the
+ // most significant part in elt 1. So when combining into one large load, we
+ // need to consider the endianness.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LD1, LD2);
+
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getAddressSpace() != LD2->getAddressSpace())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+ unsigned LD1Bytes = LD1VT.getStoreSize();
+ if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), Align);
+ }
+
+ return SDValue();
+}
+
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
+static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // If this is not a bitcast to an FP type or if the target doesn't have
+ // IEEE754-compliant FP logic, we're done.
+ EVT VT = N->getValueType(0);
+ if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ return SDValue();
+
+ // TODO: Handle cases where the integer constant is a different scalar
+ // bitwidth to the FP.
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+ if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
+ return SDValue();
+
+ unsigned FPOpcode;
+ APInt SignMask;
+ switch (N0.getOpcode()) {
+ case ISD::AND:
+ FPOpcode = ISD::FABS;
+ SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::XOR:
+ FPOpcode = ISD::FNEG;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ case ISD::OR:
+ FPOpcode = ISD::FABS;
+ SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+ break;
+ default:
+ return SDValue();
+ }
+
+ // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
+ // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
+ // fneg (fabs X)
+ SDValue LogicOp0 = N0.getOperand(0);
+ ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
+ if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
+ LogicOp0.getOpcode() == ISD::BITCAST &&
+ LogicOp0.getOperand(0).getValueType() == VT) {
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+ NumFPLogicOpsConv++;
+ if (N0.getOpcode() == ISD::OR)
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
+ return FPOp;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize types, unless both types are integer and the
+ // scalar type is legal. Only do this before legalize ops, since the target
+ // maybe depending on the bitcast.
+ // First check to see if this is all constant.
+ // TODO: Support FP bitcasts after legalize types.
+ if (VT.isVector() &&
+ (!LegalTypes ||
+ (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+ TLI.isTypeLegal(VT.getVectorElementType()))) &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ cast<BuildVectorSDNode>(N0)->isConstant())
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
+ VT.getVectorElementType());
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT))) {
+ SDValue C = DAG.getBitcast(VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getBitcast(VT, N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), LN0->getAlignment(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+
+ if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
+ return V;
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // This often reduces constant pool loads.
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() &&
+ !VT.isVector() && !N0.getValueType().isVector()) {
+ SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(NewConv.getNode());
+
+ SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, DL, VT,
+ NewConv, DAG.getConstant(SignBit, DL, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, DL, VT,
+ NewConv, DAG.getConstant(~SignBit, DL, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
+ AddToWorklist(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ SDLoc DL(X);
+ X = DAG.getNode(ISD::SRL, DL,
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, DL,
+ X.getValueType()));
+ AddToWorklist(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
+ AddToWorklist(X.getNode());
+ }
+
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, SDLoc(X), VT,
+ X, DAG.getConstant(SignBit, SDLoc(X), VT));
+ AddToWorklist(X.getNode());
+
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
+ Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
+ AddToWorklist(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
+ return CombineLD;
+
+ // Remove double bitcasts from shuffles - this is often a legacy of
+ // XformToShuffleWithZero being used to combine bitmaskings (of
+ // float vectors bitcast to integer vectors) into shuffles.
+ // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
+ VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+ !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+ // If operands are a bitcast, peek through if it casts the original VT.
+ // If operands are a constant, just bitcast back to original VT.
+ auto PeekThroughBitcast = [&](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getOperand(0).getValueType() == VT)
+ return SDValue(Op.getOperand(0));
+ if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return DAG.getBitcast(VT, Op);
+ return SDValue();
+ };
+
+ // FIXME: If either input vector is bitcast, try to convert the shuffle to
+ // the result type of this bitcast. This would eliminate at least one
+ // bitcast. See the transform in InstCombine.
+ SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+ SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+ if (!(SV0 && SV1))
+ return SDValue();
+
+ int MaskScale =
+ VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (int M : SVN->getMask())
+ for (int i = 0; i != MaskScale; ++i)
+ NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : BV->op_values()) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
+ Ops.push_back(DAG.getBitcast(DstEltVT, Op));
+ AddToWorklist(Ops.back().getNode());
+ }
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+ return DAG.getBuildVector(VT, SDLoc(BV), Ops);
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ SDLoc DL(BV);
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.isUndef()) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+ zextOrTrunc(SrcBitSize).zext(DstBitSize);
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (const SDValue &Op : BV->op_values()) {
+ if (Op.isUndef()) {
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = cast<ConstantSDNode>(Op)->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = OpVal.trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
+ OpVal.lshrInPlace(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
+static bool isContractable(SDNode *N) {
+ SDNodeFlags F = N->getFlags();
+ return F.hasAllowContract() || F.hasAllowReassociation();
+}
+
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ CanFuse || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
+ if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ std::swap(N0, N1);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1), N1, Flags);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1), N0, Flags);
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1, Flags);
+ }
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0, Flags);
+ }
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (CanFuse &&
+ N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1, Flags), Flags);
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (CanFuse &&
+ N1->getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL &&
+ N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0, Flags), Flags);
+ }
+
+
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z, Flags), Flags);
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1, Flags);
+ }
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z, Flags), Flags);
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1, Flags);
+ }
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0, Flags);
+ }
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0, Flags);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ const SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ CanFuse || HasFMAD);
+
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0, Flags);
+ }
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ }
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0, Flags);
+ }
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1, Flags));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1, Flags));
+ }
+ }
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
+ N0.getOperand(2)->hasOneUse()) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1), Flags), Flags);
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N1.getOperand(2))) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20),
+ N21, N0, Flags), Flags);
+ }
+
+
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1), Flags), Flags);
+ }
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1), Flags), Flags);
+ }
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0, Flags), Flags);
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue CvtSrc = N1.getOperand(0);
+ SDValue N100 = CvtSrc.getOperand(0);
+ SDValue N101 = CvtSrc.getOperand(1);
+ SDValue N102 = CvtSrc.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0, Flags), Flags);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+ const SDNodeFlags Flags = N->getFlags();
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // The transforms below are incorrect when x == 0 and y == inf, because the
+ // intermediate multiplication produces a nan.
+ if (!Options.NoInfsFPMath)
+ return SDValue();
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // Floating-point multiply-add with intermediate rounding. This can result
+ // in a less precise result due to the changed rounding order.
+ bool HasFMAD = Options.UnsafeFPMath &&
+ (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+ // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ if (C->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1, Flags))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0, Flags))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
+ // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
+ // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
+ // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
+ if (C0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y, Flags);
+ if (C0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ }
+ if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
+ if (C1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ if (C1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
+ }
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1, Flags))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0, Flags))
+ return FMA;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = N->getFlags();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fadd c1, c2) -> c1 + c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
+
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+
+ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+ if (N1C && N1C->isZero())
+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
+ return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N1,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
+
+ auto isFMulNegTwo = [](SDValue FMul) {
+ if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
+ return false;
+ auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
+ return C && C->isExactlyValue(-2.0);
+ };
+
+ // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N0)) {
+ SDValue B = N0.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ }
+ // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
+ if (isFMulNegTwo(N1)) {
+ SDValue B = N1.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
+ }
+
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
+
+ // If nnan is enabled, fold lots of things.
+ if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ // If allowed, fold (fadd (fneg x), x) -> 0.0
+ if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ return DAG.getConstantFP(0.0, DL, VT);
+
+ // If allowed, fold (fadd x, (fneg x)) -> 0.0
+ if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ return DAG.getConstantFP(0.0, DL, VT);
+ }
+
+ // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // TODO: break out portions of the transformations below for which Unsafe is
+ // considered and which do not require both nsz and reassoc
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ AllowNewConst) {
+ // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+ if (N1CFP && N0.getOpcode() == ISD::FADD &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ }
+
+ // We can fold chains of FADD's of the same value into multiplications.
+ // This transform is not safe in general because we are reducing the number
+ // of rounding steps.
+ if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+
+ // (fadd (fmul x, c), x) -> (fmul x, c+1)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+
+ // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N0.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::FADD) {
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD) {
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
+ }
+ }
+ } // enable-unsafe-fp-math
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = N->getFlags();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero()) {
+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) {
+ return N0;
+ }
+ }
+
+ if (N0 == N1) {
+ // (fsub x, x) -> 0.0
+ if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+ return DAG.getConstantFP(0.0f, DL, VT);
+ }
+
+ // (fsub -0.0, N1) -> -N1
+ // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
+ // FSUB does not specify the sign bit of a NaN. Also note that for
+ // the same reason, the inverse transform is not safe, unless fast math
+ // flags are in play.
+ if (N0CFP && N0CFP->isZero()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+ }
+ }
+
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ N1.getOpcode() == ISD::FADD) {
+ // X - (X + Y) -> -Y
+ if (N0 == N1->getOperand(0))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ // X - (Y + X) -> -Y
+ if (N0 == N1->getOperand(1))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ }
+
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(
+ ISD::FADD, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+ if (char LHSNeg =
+ TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
+ if (char RHSNeg =
+ TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
+ // Both negated operands are at least as cheap as their counterparts.
+ // Check to see if at least one is cheaper negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return true;
+
+ return false;
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = N->getFlags();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // This just handles C1 * C2 for vectors. Other vector folds are below.
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
+
+ // canonicalize constant to RHS
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
+ // fold (fmul A, 0) -> 0
+ if (N1CFP && N1CFP->isZero())
+ return N1;
+ }
+
+ if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
+ if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ N0.getOpcode() == ISD::FMUL) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ // Avoid an infinite loop by making sure that N00 is not a constant
+ // (the inner multiply has not been constant folded yet).
+ if (isConstantFPBuildVectorOrConstantFP(N01) &&
+ !isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
+ }
+ }
+
+ // Match a special-case: we convert X * 2.0 into fadd.
+ // fmul (fadd X, X), C -> fmul X, 2.0 * C
+ if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
+ }
+ }
+
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N0);
+
+ // -N0 * -N1 --> N0 * N1
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
+ }
+
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // FMA nodes have flags that propagate to the created nodes.
+ const SDNodeFlags Flags = N->getFlags();
+ bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+
+ // Constant fold FMA.
+ if (isa<ConstantFPSDNode>(N0) &&
+ isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2)) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
+ }
+
+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ }
+
+ if (UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
+ // TODO: The FMA node should have flags that propagate to these nodes.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+
+ if (UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
+ Flags), Flags);
+ }
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, DL, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
+ Flags),
+ N2);
+ }
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
+ AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
+ return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
+ }
+
+ // fma (fneg x), K, y -> fma x -K, y
+ if (N0.getOpcode() == ISD::FNEG &&
+ (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
+ ForCodeSize)))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ }
+ }
+
+ if (UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT), Flags),
+ Flags);
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT), Flags),
+ Flags);
+ }
+ }
+
+ return SDValue();
+}
+
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different targets
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ // TODO: Limit this transform based on optsize/minsize - it always creates at
+ // least 1 extra instruction. But the perf win may be substantial enough
+ // that only minsize should restrict this.
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags Flags = N->getFlags();
+ if (!UnsafeMath && !Flags.hasAllowReciprocal())
+ return SDValue();
+
+ // Skip if current node is a reciprocal/fneg-reciprocal.
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+ if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ SDValue N1 = N->getOperand(1);
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+
+ // For splat vectors, scale the number of uses by the splat factor. If we can
+ // convert the division into a scalar op, that will likely be much faster.
+ unsigned NumElts = 1;
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && DAG.isSplatValue(N1))
+ NumElts = VT.getVectorNumElements();
+
+ if (!MinUses || (N1->use_size() * NumElts) < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if ((Users.size() * NumElts) < MinUses)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags Flags = N->getFlags();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ if (SDValue V = combineRepeatedFPDivisors(N))
+ return V;
+
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP) {
+ // Compute the reciprocal 1.0 / c2.
+ const APFloat &N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT), Flags);
+ }
+
+ // If this FDIV is part of a reciprocal square root, it may be folded
+ // into a target-specific square root estimate instruction.
+ if (N1.getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ } else if (N1.getOpcode() == ISD::FP_ROUND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue SqrtOp;
+ SDValue OtherOp;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(0);
+ OtherOp = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(1);
+ OtherOp = N1.getOperand(0);
+ }
+ if (SqrtOp.getNode()) {
+ // We found a FSQRT, so try to make this fold:
+ // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+ if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ }
+ }
+
+ // Fold into a reciprocal estimate and multiply instead of a real divide.
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
+ }
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (isCheaperToUseNegatedFPOps(N0, N1))
+ return DAG.getNode(
+ ISD::FDIV, SDLoc(N), VT,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+ SDNodeFlags Flags = N->getFlags();
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (TLI.isFsqrtCheap(N0, DAG))
+ return SDValue();
+
+ // FSQRT nodes have flags that propagate to the created nodes.
+ return buildSqrtEstimate(N0, Flags);
+}
+
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if ((N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND)) {
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+ return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ }
+ return false;
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
+
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
+ const APFloat &V = N1C->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFPOW(SDNode *N) {
+ ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
+ if (!ExponentC)
+ return SDValue();
+
+ // Try to convert x ** (1/3) into cube root.
+ // TODO: Handle the various flavors of long double.
+ // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+ // Some range near 1/3 should be fine.
+ EVT VT = N->getValueType(0);
+ if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+ (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+ // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+ // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+ // pow(-val, 1/3) = nan; cbrt(-val) = -num.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf nnan afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+ if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Do not create a cbrt() libcall if the target does not have it, and do not
+ // turn a pow that has lowering support into a cbrt() libcall.
+ if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+ (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+ DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+ return SDValue();
+
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ }
+
+ // Try to convert x ** (1/4) and x ** (3/4) into square roots.
+ // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
+ // TODO: This could be extended (using a target hook) to handle smaller
+ // power-of-2 fractional exponents.
+ bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+ bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+ if (ExponentIs025 || ExponentIs075) {
+ // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
+ // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+ // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
+ // For regular numbers, rounding may cause the results to differ.
+ // Therefore, we require { nsz ninf afn } for this transform.
+ // TODO: We could select out the special cases if we don't have nsz/ninf.
+ SDNodeFlags Flags = N->getFlags();
+
+ // We only need no signed zeros for the 0.25 case.
+ if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
+ !Flags.hasApproximateFuncs())
+ return SDValue();
+
+ // Don't double the number of libcalls. We are trying to inline fast code.
+ if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
+ return SDValue();
+
+ // Assume that libcalls are the smallest code.
+ // TODO: This restriction should probably be lifted for vectors.
+ if (DAG.getMachineFunction().getFunction().hasOptSize())
+ return SDValue();
+
+ // pow(X, 0.25) --> sqrt(sqrt(X))
+ SDLoc DL(N);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ if (ExponentIs025)
+ return SqrtSqrt;
+ // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
+ }
+
+ return SDValue();
+}
+
+static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // This optimization is guarded by a function attribute because it may produce
+ // unexpected results. Ie, programs may be relying on the platform-specific
+ // undefined behavior when the float-to-int conversion overflows.
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
+ if (StrictOverflow.getValueAsString().equals("false"))
+ return SDValue();
+
+ // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
+ // replacing casts with a libcall. We also must be allowed to ignore -0.0
+ // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
+ // conversions would return +0.0.
+ // FIXME: We should be able to use node-level FMF here.
+ // TODO: If strict math, should we use FABS (+ range check for signed cast)?
+ EVT VT = N->getValueType(0);
+ if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
+ !DAG.getTarget().Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // fptosi/fptoui round towards zero, so converting from FP to integer and
+ // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
+ SDValue N0 = N->getOperand(0);
+ if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
+ hasOperation(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+ }
+
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
+ hasOperation(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+ }
+
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
+ return SDValue();
+}
+
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ return DAG.getBitcast(VT, Src);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
+
+ // Skip this folding if it results in an fp_round from f80 to f16.
+ //
+ // f80 to f16 always generates an expensive (and as yet, unimplemented)
+ // libcall to __truncxfhf2 instead of selecting native f16 conversion
+ // instructions from f32 or f64. Moreover, the first (value-preserving)
+ // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+ // x86.
+ if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+ return SDValue();
+
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ }
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
+ N0.getOperand(0), N1);
+ AddToWorklist(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
+
+ // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+ if (N0.getOpcode() == ISD::FP16_TO_FP &&
+ TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
+ N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0))),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+
+ // fold ftrunc (known rounded int x) -> x
+ // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
+ // likely to be generated to extract integer from a rounded floating value.
+ switch (N0.getOpcode()) {
+ default: break;
+ case ISD::FRINT:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ return N0;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+// FIXME: FNEG and FABS have a lot in common; refactor.
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Constant fold FNEG.
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+
+ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFNegFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x80... per scalar element
+ // and splat it.
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x80...
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
+ }
+ SDLoc DL0(N0);
+ Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+ DAG.getConstant(SignMask, DL0, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+ }
+ }
+
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ APFloat CVal = CFP1->getValueAPF();
+ CVal.changeSign();
+ if (Level >= AfterLegalizeDAG &&
+ (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
+ N0->getFlags());
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
+ APFloat (*Op)(const APFloat &, const APFloat &)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
+ }
+
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minnum);
+}
+
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maxnum);
+}
+
+SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, minimum);
+}
+
+SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
+ return visitFMinMax(DAG, N, maximum);
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
+
+ // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
+ if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x7f... per scalar element
+ // and splat it.
+ SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x7f...
+ SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if (N1.hasOneUse()) {
+ if (SDValue NewN1 = rebuildSetCC(N1))
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+ if (N.getOpcode() == ISD::SRL ||
+ (N.getOpcode() == ISD::TRUNCATE &&
+ (N.getOperand(0).hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::SRL))) {
+ // Look pass the truncate.
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
+ SDLoc DL(N);
+ return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
+ }
+ }
+ }
+ }
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N.getOpcode() == ISD::XOR) {
+ // Because we may call this on a speculatively constructed
+ // SimplifiedSetCC Node, we need to simplify this node first.
+ // Ideally this should be folded into SimplifySetCC and not
+ // here. For now, grab a handle to N so we don't lose it from
+ // replacements interal to the visit.
+ HandleSDNode XORHandle(N);
+ while (N.getOpcode() == ISD::XOR) {
+ SDValue Tmp = visitXOR(N.getNode());
+ // No simplification done.
+ if (!Tmp.getNode())
+ break;
+ // Returning N is form in-visit replacement that may invalidated
+ // N. Grab value from Handle.
+ if (Tmp.getNode() == N.getNode())
+ N = XORHandle.getValue();
+ else // Node simplified. Try simplifying again.
+ N = Tmp;
+ }
+
+ if (N.getOpcode() != ISD::XOR)
+ return N;
+
+ SDNode *TheXor = N.getNode();
+
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (isOneConstant(Op0) && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N.getValueType();
+ if (LegalTypes)
+ SetCCVT = getSetCCResultType(SetCCVT);
+ // Replace the uses of XOR with SETCC
+ return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), SDLoc(N),
+ false);
+ if (Simp.getNode()) AddToWorklist(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+
+ // Would require a copy.
+ if (Val == BasePtr)
+ return false;
+
+ // Would create a cycle.
+ if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Caches for hasPredecessorHelper.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+ UE = BasePtr.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ // Skip the use that is Ptr and uses of other results from BasePtr's
+ // node (important for nodes that return multiple results).
+ if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
+ continue;
+
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
+ continue;
+
+ if (Use.getUser()->getOpcode() != ISD::ADD &&
+ Use.getUser()->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use.getUser());
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ for (SDNode *Use : Ptr.getNode()->uses()) {
+ if (Use == N)
+ continue;
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ // We need to replace ptr0 in the following expression:
+ // x0 * offset0 + y0 * ptr0 = t0
+ // knowing that
+ // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+ //
+ // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+ // indexed load/store and the expression that needs to be re-written.
+ //
+ // Therefore, we have:
+ // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ int X0, X1, Y0, Y1;
+ const APInt &Offset0 = CN->getAPIntValue();
+ APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+
+ X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+
+ unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+ APInt CNV = Offset0;
+ if (X0 < 0) CNV = -CNV;
+ if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+ else CNV = CNV - Offset1;
+
+ SDLoc DL(OtherUses[i]);
+
+ // We can now generate the new expression.
+ SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
+ SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+ SDValue NewUse = DAG.getNode(Opcode,
+ DL,
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ deleteAndRecombine(OtherUses[i]);
+ }
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ deleteAndRecombine(Ptr.getNode());
+ AddToWorklist(Result.getNode());
+
+ return true;
+}
+
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode *Op : Ptr.getNode()->uses()) {
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode *UseUse : Use->uses()) {
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0));
+ deleteAndRecombine(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+
+ // Some backends use TargetConstants for load offsets, but don't expect
+ // TargetConstants in general ADD nodes. We can convert these constants into
+ // regular Constants (if the constant is not opaque).
+ assert((Inc.getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+ "Cannot split out indexing using opaque target constants");
+ if (Inc.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
+ ConstInc->getValueType(0));
+ }
+
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
+static inline int numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorNumElements() : 0;
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+ Val = ST->getValue();
+ EVT STType = Val.getValueType();
+ EVT STMemType = ST->getMemoryVT();
+ if (STType == STMemType)
+ return true;
+ if (isTypeLegal(STMemType))
+ return false; // fail.
+ if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+ TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+ Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+ STType.isInteger() && STMemType.isInteger()) {
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+ Val = DAG.getBitcast(STMemType, Val);
+ return true;
+ }
+ return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+ EVT LDMemType = LD->getMemoryVT();
+ EVT LDType = LD->getValueType(0);
+ assert(Val.getValueType() == LDMemType &&
+ "Attempting to extend value of non-matching type");
+ if (LDType == LDMemType)
+ return true;
+ if (LDMemType.isInteger() && LDType.isInteger()) {
+ switch (LD->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ Val = DAG.getBitcast(LDType, Val);
+ return true;
+ case ISD::EXTLOAD:
+ Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::SEXTLOAD:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::ZEXTLOAD:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())
+ return SDValue();
+ SDValue Chain = LD->getOperand(0);
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ // TODO: Relax this restriction for unordered atomics (see D66309)
+ if (!ST || !ST->isSimple())
+ return SDValue();
+
+ EVT LDType = LD->getValueType(0);
+ EVT LDMemType = LD->getMemoryVT();
+ EVT STMemType = ST->getMemoryVT();
+ EVT STType = ST->getValue().getValueType();
+
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
+ int64_t Offset;
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ return SDValue();
+
+ // Normalize for Endianness. After this Offset=0 will denote that the least
+ // significant bit in the loaded value maps to the least significant bit in
+ // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+ // n:th least significant byte of the stored value.
+ if (DAG.getDataLayout().isBigEndian())
+ Offset = (STMemType.getStoreSizeInBits() -
+ LDMemType.getStoreSizeInBits()) / 8 - Offset;
+
+ // Check that the stored value cover all bits that are loaded.
+ bool STCoversLD =
+ (Offset >= 0) &&
+ (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+
+ auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
+ if (LD->isIndexed()) {
+ bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
+ LD->getAddressingMode() == ISD::POST_DEC);
+ unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
+ SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
+ LD->getOperand(1), LD->getOperand(2));
+ SDValue Ops[] = {Val, Idx, Chain};
+ return CombineTo(LD, Ops, 3);
+ }
+ return CombineTo(LD, Val, Chain);
+ };
+
+ if (!STCoversLD)
+ return SDValue();
+
+ // Memory as copy space (potentially masked).
+ if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+ // Simple case: Direct non-truncating forwarding
+ if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ return ReplaceLd(LD, ST->getValue(), Chain);
+ // Can we model the truncate and extension with an and mask?
+ if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+ !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+ // Mask to size of LDMemType
+ auto Mask =
+ DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
+ STMemType.getSizeInBits()),
+ SDLoc(ST), STType);
+ auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+ return ReplaceLd(LD, Val, Chain);
+ }
+ }
+
+ // TODO: Deal with nonzero offset.
+ if (LD->getBasePtr().isUndef() || Offset != 0)
+ return SDValue();
+ // Model necessary truncations / extenstions.
+ SDValue Val;
+ // Truncate Value To Stored Memory Size.
+ do {
+ if (!getTruncatedStoreValue(ST, Val))
+ continue;
+ if (!isTypeLegal(LDMemType))
+ continue;
+ if (STMemType != LDMemType) {
+ // TODO: Support vectors? This requires extract_subvector/bitcast.
+ if (!STMemType.isVector() && !LDMemType.isVector() &&
+ STMemType.isInteger() && LDMemType.isInteger())
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+ else
+ continue;
+ }
+ if (!extendLoadedValueToExtension(LD, Val))
+ continue;
+ return ReplaceLd(LD, Val, Chain);
+ } while (false);
+
+ // On failure, cleanup dead nodes we may have created.
+ if (Val->use_empty())
+ deleteAndRecombine(Val.getNode());
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ // TODO: Allow this for unordered atomics (see D66309)
+ if (LD->isSimple()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (!N->hasAnyUseOfValue(0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddUsersToWorklist(Chain.getNode());
+ if (N->use_empty())
+ deleteAndRecombine(N);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+
+ // If this load has an opaque TargetConstant offset, then we cannot split
+ // the indexing into an add/sub directly (that TargetConstant may not be
+ // valid for a different type of node, and we cannot convert an opaque
+ // target constant into a regular constant).
+ bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+
+ if (!N->hasAnyUseOfValue(0) &&
+ ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorklist(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
+ LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ if (auto V = ForwardStoreValueToDirectLoad(LD))
+ return V;
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
+ SDValue NewLoad = DAG.getExtLoad(
+ LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), Align,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ // NewLoad will always be N as we are only refining the alignment
+ assert(NewLoad.getNode() == N);
+ (void)NewLoad;
+ }
+ }
+ }
+
+ if (LD->isUnindexed()) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(LD, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
+ BetterChain, Ptr, LD->getMemOperand());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getMemoryVT(),
+ LD->getMemOperand());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Replace uses with load result and token factor
+ return CombineTo(N, ReplLoad.getValue(0), Token);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // Try to slice up N to more direct loads if the slices are mapped to
+ // different register banks or pairing can take place.
+ if (SliceUpLoad(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+namespace {
+
+/// Helper structure used to slice a load in smaller loads.
+/// Basically a slice is obtained from the following sequence:
+/// Origin = load Ty1, Base
+/// Shift = srl Ty1 Origin, CstTy Amount
+/// Inst = trunc Shift to Ty2
+///
+/// Then, it will be rewritten into:
+/// Slice = load SliceTy, Base + SliceOffset
+/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
+///
+/// SliceTy is deduced from the number of bits that are actually used to
+/// build Inst.
+struct LoadedSlice {
+ /// Helper structure used to compute the cost of a slice.
+ struct Cost {
+ /// Are we optimizing for code size.
+ bool ForCodeSize = false;
+
+ /// Various cost.
+ unsigned Loads = 0;
+ unsigned Truncates = 0;
+ unsigned CrossRegisterBanksCopies = 0;
+ unsigned ZExts = 0;
+ unsigned Shift = 0;
+
+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
+
+ /// Get the cost of one isolated slice.
+ Cost(const LoadedSlice &LS, bool ForCodeSize)
+ : ForCodeSize(ForCodeSize), Loads(1) {
+ EVT TruncType = LS.Inst->getValueType(0);
+ EVT LoadedType = LS.getLoadedType();
+ if (TruncType != LoadedType &&
+ !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
+ ZExts = 1;
+ }
+
+ /// Account for slicing gain in the current cost.
+ /// Slicing provide a few gains like removing a shift or a
+ /// truncate. This method allows to grow the cost of the original
+ /// load with the gain from this slice.
+ void addSliceGain(const LoadedSlice &LS) {
+ // Each slice saves a truncate.
+ const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
+ ++Truncates;
+ // If there is a shift amount, this slice gets rid of it.
+ if (LS.Shift)
+ ++Shift;
+ // If this slice can merge a cross register bank copy, account for it.
+ if (LS.canMergeExpensiveCrossRegisterBankCopy())
+ ++CrossRegisterBanksCopies;
+ }
+
+ Cost &operator+=(const Cost &RHS) {
+ Loads += RHS.Loads;
+ Truncates += RHS.Truncates;
+ CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
+ ZExts += RHS.ZExts;
+ Shift += RHS.Shift;
+ return *this;
+ }
+
+ bool operator==(const Cost &RHS) const {
+ return Loads == RHS.Loads && Truncates == RHS.Truncates &&
+ CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
+ ZExts == RHS.ZExts && Shift == RHS.Shift;
+ }
+
+ bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Cost &RHS) const {
+ // Assume cross register banks copies are as expensive as loads.
+ // FIXME: Do we want some more target hooks?
+ unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
+ unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
+ // Unless we are optimizing for code size, consider the
+ // expensive operation first.
+ if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
+ return ExpensiveOpsLHS < ExpensiveOpsRHS;
+ return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
+ (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
+ }
+
+ bool operator>(const Cost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
+ };
+
+ // The last instruction that represent the slice. This should be a
+ // truncate instruction.
+ SDNode *Inst;
+
+ // The original load instruction.
+ LoadSDNode *Origin;
+
+ // The right shift amount in bits from the original load.
+ unsigned Shift;
+
+ // The DAG from which Origin came from.
+ // This is used to get some contextual information about legal types, etc.
+ SelectionDAG *DAG;
+
+ LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+ unsigned Shift = 0, SelectionDAG *DAG = nullptr)
+ : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
+
+ /// Get the bits used in a chunk of bits \p BitWidth large.
+ /// \return Result is \p BitWidth and has used bits set to 1 and
+ /// not used bits set to 0.
+ APInt getUsedBits() const {
+ // Reproduce the trunc(lshr) sequence:
+ // - Start from the truncated value.
+ // - Zero extend to the desired bit width.
+ // - Shift left.
+ assert(Origin && "No original load to compare against.");
+ unsigned BitWidth = Origin->getValueSizeInBits(0);
+ assert(Inst && "This slice is not bound to an instruction");
+ assert(Inst->getValueSizeInBits(0) <= BitWidth &&
+ "Extracted slice is bigger than the whole type!");
+ APInt UsedBits(Inst->getValueSizeInBits(0), 0);
+ UsedBits.setAllBits();
+ UsedBits = UsedBits.zext(BitWidth);
+ UsedBits <<= Shift;
+ return UsedBits;
+ }
+
+ /// Get the size of the slice to be loaded in bytes.
+ unsigned getLoadedSize() const {
+ unsigned SliceSize = getUsedBits().countPopulation();
+ assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+ return SliceSize / 8;
+ }
+
+ /// Get the type that will be loaded for this slice.
+ /// Note: This may not be the final type for the slice.
+ EVT getLoadedType() const {
+ assert(DAG && "Missing context");
+ LLVMContext &Ctxt = *DAG->getContext();
+ return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
+ }
+
+ /// Get the alignment of the load used for this slice.
+ unsigned getAlignment() const {
+ unsigned Alignment = Origin->getAlignment();
+ uint64_t Offset = getOffsetFromBase();
+ if (Offset != 0)
+ Alignment = MinAlign(Alignment, Alignment + Offset);
+ return Alignment;
+ }
+
+ /// Check if this slice can be rewritten with legal operations.
+ bool isLegal() const {
+ // An invalid slice is not legal.
+ if (!Origin || !Inst || !DAG)
+ return false;
+
+ // Offsets are for indexed load only, we do not handle that.
+ if (!Origin->getOffset().isUndef())
+ return false;
+
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+
+ // Check that the type is legal.
+ EVT SliceType = getLoadedType();
+ if (!TLI.isTypeLegal(SliceType))
+ return false;
+
+ // Check that the load is legal for this type.
+ if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
+ return false;
+
+ // Check that the offset can be computed.
+ // 1. Check its type.
+ EVT PtrType = Origin->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ // 2. Check that it fits in the immediate.
+ if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
+ return false;
+
+ // 3. Check that the computation is legal.
+ if (!TLI.isOperationLegal(ISD::ADD, PtrType))
+ return false;
+
+ // Check that the zext is legal if it needs one.
+ EVT TruncateType = Inst->getValueType(0);
+ if (TruncateType != SliceType &&
+ !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
+ return false;
+
+ return true;
+ }
+
+ /// Get the offset in bytes of this slice in the original chunk of
+ /// bits.
+ /// \pre DAG != nullptr.
+ uint64_t getOffsetFromBase() const {
+ assert(DAG && "Missing context.");
+ bool IsBigEndian = DAG->getDataLayout().isBigEndian();
+ assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
+ uint64_t Offset = Shift / 8;
+ unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
+ assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
+ "The size of the original loaded type is not a multiple of a"
+ " byte.");
+ // If Offset is bigger than TySizeInBytes, it means we are loading all
+ // zeros. This should have been optimized before in the process.
+ assert(TySizeInBytes > Offset &&
+ "Invalid shift amount for given loaded size");
+ if (IsBigEndian)
+ Offset = TySizeInBytes - Offset - getLoadedSize();
+ return Offset;
+ }
+
+ /// Generate the sequence of instructions to load the slice
+ /// represented by this object and redirect the uses of this slice to
+ /// this new sequence of instructions.
+ /// \pre this->Inst && this->Origin are valid Instructions and this
+ /// object passed the legal check: LoadedSlice::isLegal returned true.
+ /// \return The last instruction of the sequence used to load the slice.
+ SDValue loadSlice() const {
+ assert(Inst && Origin && "Unable to replace a non-existing slice.");
+ const SDValue &OldBaseAddr = Origin->getBasePtr();
+ SDValue BaseAddr = OldBaseAddr;
+ // Get the offset in that chunk of bytes w.r.t. the endianness.
+ int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
+ assert(Offset >= 0 && "Offset too big to fit in int64_t!");
+ if (Offset) {
+ // BaseAddr = BaseAddr + Offset.
+ EVT ArithType = BaseAddr.getValueType();
+ SDLoc DL(Origin);
+ BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+ DAG->getConstant(Offset, DL, ArithType));
+ }
+
+ // Create the type of the loaded slice according to its size.
+ EVT SliceType = getLoadedType();
+
+ // Create the load for the slice.
+ SDValue LastInst =
+ DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset),
+ getAlignment(), Origin->getMemOperand()->getFlags());
+ // If the final type is not the same as the loaded type, this means that
+ // we have to pad with zero. Create a zero extend for that.
+ EVT FinalType = Inst->getValueType(0);
+ if (SliceType != FinalType)
+ LastInst =
+ DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
+ return LastInst;
+ }
+
+ /// Check if this slice can be merged with an expensive cross register
+ /// bank copy. E.g.,
+ /// i = load i32
+ /// f = bitcast i32 i to float
+ bool canMergeExpensiveCrossRegisterBankCopy() const {
+ if (!Inst || !Inst->hasOneUse())
+ return false;
+ SDNode *Use = *Inst->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST)
+ return false;
+ assert(DAG && "Missing context");
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ EVT ResVT = Use->getValueType(0);
+ const TargetRegisterClass *ResRC =
+ TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
+ const TargetRegisterClass *ArgRC =
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+ Use->getOperand(0)->isDivergent());
+ if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // At this point, we know that we perform a cross-register-bank copy.
+ // Check if it is expensive.
+ const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
+ // Assume bitcasts are cheap, unless both register classes do not
+ // explicitly share a common sub class.
+ if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
+ return false;
+
+ // Check if it will be merged with the load.
+ // 1. Check the alignment constraint.
+ unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
+ ResVT.getTypeForEVT(*DAG->getContext()));
+
+ if (RequiredAlignment > getAlignment())
+ return false;
+
+ // 2. Check that the load is a legal operation for that type.
+ if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // 3. Check that we do not have a zext in the way.
+ if (Inst->getValueType(0) != getLoadedType())
+ return false;
+
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+/// Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// \p UsedBits looks like 0..0 1..1 0..0.
+static bool areUsedBitsDense(const APInt &UsedBits) {
+ // If all the bits are one, this is dense!
+ if (UsedBits.isAllOnesValue())
+ return true;
+
+ // Get rid of the unused bits on the right.
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ // Get rid of the unused bits on the left.
+ if (NarrowedUsedBits.countLeadingZeros())
+ NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
+ // Check that the chunk of bits is completely used.
+ return NarrowedUsedBits.isAllOnesValue();
+}
+
+/// Check whether or not \p First and \p Second are next to each other
+/// in memory. This means that there is no hole between the bits loaded
+/// by \p First and the bits loaded by \p Second.
+static bool areSlicesNextToEachOther(const LoadedSlice &First,
+ const LoadedSlice &Second) {
+ assert(First.Origin == Second.Origin && First.Origin &&
+ "Unable to match different memory origins.");
+ APInt UsedBits = First.getUsedBits();
+ assert((UsedBits & Second.getUsedBits()) == 0 &&
+ "Slices are not supposed to overlap.");
+ UsedBits |= Second.getUsedBits();
+ return areUsedBitsDense(UsedBits);
+}
+
+/// Adjust the \p GlobalLSCost according to the target
+/// paring capabilities and the layout of the slices.
+/// \pre \p GlobalLSCost should account for at least as many loads as
+/// there is in the slices in \p LoadedSlices.
+static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ LoadedSlice::Cost &GlobalLSCost) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ // If there is less than 2 elements, no pairing is possible.
+ if (NumberOfSlices < 2)
+ return;
+
+ // Sort the slices so that elements that are likely to be next to each
+ // other in memory are next to each other in the list.
+ llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
+ const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
+ // First (resp. Second) is the first (resp. Second) potentially candidate
+ // to be placed in a paired load.
+ const LoadedSlice *First = nullptr;
+ const LoadedSlice *Second = nullptr;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
+ // Set the beginning of the pair.
+ First = Second) {
+ Second = &LoadedSlices[CurrSlice];
+
+ // If First is NULL, it means we start a new pair.
+ // Get to the next slice.
+ if (!First)
+ continue;
+
+ EVT LoadedType = First->getLoadedType();
+
+ // If the types of the slices are different, we cannot pair them.
+ if (LoadedType != Second->getLoadedType())
+ continue;
+
+ // Check if the target supplies paired loads for this type.
+ unsigned RequiredAlignment = 0;
+ if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
+ // move to the next pair, this type is hopeless.
+ Second = nullptr;
+ continue;
+ }
+ // Check if we meet the alignment requirement.
+ if (RequiredAlignment > First->getAlignment())
+ continue;
+
+ // Check that both loads are next to each other in memory.
+ if (!areSlicesNextToEachOther(*First, *Second))
+ continue;
+
+ assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
+ --GlobalLSCost.Loads;
+ // Move to the next pair.
+ Second = nullptr;
+ }
+}
+
+/// Check the profitability of all involved LoadedSlice.
+/// Currently, it is considered profitable if there is exactly two
+/// involved slices (1) which are (2) next to each other in memory, and
+/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
+///
+/// Note: The order of the elements in \p LoadedSlices may be modified, but not
+/// the elements themselves.
+///
+/// FIXME: When the cost model will be mature enough, we can relax
+/// constraints (1) and (2).
+static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ const APInt &UsedBits, bool ForCodeSize) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ if (StressLoadSlicing)
+ return NumberOfSlices > 1;
+
+ // Check (1).
+ if (NumberOfSlices != 2)
+ return false;
+
+ // Check (2).
+ if (!areUsedBitsDense(UsedBits))
+ return false;
+
+ // Check (3).
+ LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
+ // The original code has one big load.
+ OrigCost.Loads = 1;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
+ const LoadedSlice &LS = LoadedSlices[CurrSlice];
+ // Accumulate the cost of all the slices.
+ LoadedSlice::Cost SliceCost(LS, ForCodeSize);
+ GlobalSlicingCost += SliceCost;
+
+ // Account as cost in the original configuration the gain obtained
+ // with the current slices.
+ OrigCost.addSliceGain(LS);
+ }
+
+ // If the target supports paired load, adjust the cost accordingly.
+ adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
+ return OrigCost > GlobalSlicingCost;
+}
+
+/// If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+bool DAGCombiner::SliceUpLoad(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
+ !LD->getValueType(0).isInteger())
+ return false;
+
+ // Keep track of already used bits to detect overlapping values.
+ // In that case, we will just abort the transformation.
+ APInt UsedBits(LD->getValueSizeInBits(0), 0);
+
+ SmallVector<LoadedSlice, 4> LoadedSlices;
+
+ // Check if this load is used as several smaller chunks of bits.
+ // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+ // of computation for each trunc.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ SDNode *User = *UI;
+ unsigned Shift = 0;
+
+ // Check if this is a trunc(lshr).
+ if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
+ isa<ConstantSDNode>(User->getOperand(1))) {
+ Shift = User->getConstantOperandVal(1);
+ User = *User->use_begin();
+ }
+
+ // At this point, User is a Truncate, iff we encountered, trunc or
+ // trunc(lshr).
+ if (User->getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ // The width of the type must be a power of 2 and greater than 8-bits.
+ // Otherwise the load cannot be represented in LLVM IR.
+ // Moreover, if we shifted with a non-8-bits multiple, the slice
+ // will be across several bytes. We do not support that.
+ unsigned Width = User->getValueSizeInBits(0);
+ if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+ return false;
+
+ // Build the slice for this chain of computations.
+ LoadedSlice LS(User, LD, Shift, &DAG);
+ APInt CurrentUsedBits = LS.getUsedBits();
+
+ // Check if this slice overlaps with another.
+ if ((CurrentUsedBits & UsedBits) != 0)
+ return false;
+ // Update the bits used globally.
+ UsedBits |= CurrentUsedBits;
+
+ // Check if the new slice would be legal.
+ if (!LS.isLegal())
+ return false;
+
+ // Record the slice.
+ LoadedSlices.push_back(LS);
+ }
+
+ // Abort slicing if it does not seem to be profitable.
+ if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
+ return false;
+
+ ++SlicedLoads;
+
+ // Rewrite each chain to use an independent load.
+ // By construction, each chain can be represented by a unique load.
+
+ // Prepare the argument for the new token factor for all the slices.
+ SmallVector<SDValue, 8> ArgChains;
+ for (SmallVectorImpl<LoadedSlice>::const_iterator
+ LSIt = LoadedSlices.begin(),
+ LSItEnd = LoadedSlices.end();
+ LSIt != LSItEnd; ++LSIt) {
+ SDValue SliceInst = LSIt->loadSlice();
+ CombineTo(LSIt->Inst, SliceInst, true);
+ if (SliceInst.getOpcode() != ISD::LOAD)
+ SliceInst = SliceInst.getOperand(0);
+ assert(SliceInst->getOpcode() == ISD::LOAD &&
+ "It takes more than a zext to get to the loaded slice!!");
+ ArgChains.push_back(SliceInst.getValue(1));
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
+ ArgChains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
+ return true;
+}
+
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out. If so, return the byte size being masked out
+/// and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = countLeadingZeros(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = countTrailingZeros(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ // For narrowing to be valid, it must be the case that the load the
+ // immediately preceding memory operation before the store.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() == ISD::TokenFactor &&
+ SDValue(LD, 1).hasOneUse()) {
+ // LD has only 1 chain use so they are no indirect dependencies.
+ if (!LD->isOperandOf(Chain.getNode()))
+ return Result;
+ } else
+ return Result; // Fail.
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
+static SDValue
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization (and the target doesn't explicitly think this is a bad idea).
+ MVT VT = MVT::getIntegerVT(NumBytes * 8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DC->isTypeLegal(VT))
+ return SDValue();
+ if (St->getMemOperand() &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *St->getMemOperand()))
+ return SDValue();
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift) {
+ SDLoc DL(IVal);
+ IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8, DL,
+ DC->getShiftAmountTy(IVal.getValueType())));
+ }
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ unsigned NewAlign = St->getAlignment();
+
+ if (DAG.getDataLayout().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ SDLoc DL(IVal);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+ NewAlign = MinAlign(NewAlign, StOffset);
+ }
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
+
+ ++OpsNarrowed;
+ return DAG
+ .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign);
+}
+
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!ST->isSimple())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return NewST;
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return NewST;
+ }
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
+ while (NewBW < BitWidth &&
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+ std::min(BitWidth, ShAmt + NewBW));
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, SDLoc(LD),
+ Ptr.getValueType()));
+ SDValue NewLD =
+ DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
+ DAG.getConstant(NewImm, SDLoc(Value),
+ NewVT));
+ SDValue NewST =
+ DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
+
+ AddToWorklist(NewPtr.getNode());
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewVal.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD =
+ DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LDAlign);
+
+ SDValue NewST =
+ DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(), STAlign);
+
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewST.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
+
+ // don't include nodes that are children or repeated nodes.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
+ }
+
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getTokenFactor(StoreDL, Chains);
+}
+
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector, bool UseTrunc) {
+ // Make sure we have something to merge.
+ if (NumStores < 2)
+ return false;
+
+ // The latest Node in the DAG.
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ unsigned SizeInBits = NumStores * ElementSizeBits;
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+ EVT StoreTy;
+ if (UseVector) {
+ unsigned Elts = NumStores * NumMemElts;
+ // Get the type for the merged vector store.
+ StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ } else
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ if (IsConstantSrc) {
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ // If constant is of the wrong type, convert it now.
+ if (MemVT != Val.getValueType()) {
+ Val = peekThroughBitcasts(Val);
+ // Deal with constants of wrong size.
+ if (ElementSizeBits != Val.getValueSizeInBits()) {
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ if (isa<ConstantFPSDNode>(Val)) {
+ // Not clear how to truncate FP values.
+ return false;
+ } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
+ }
+ // Make sure correctly size type is the correct type.
+ Val = DAG.getBitcast(MemVT, Val);
+ }
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, BuildVector);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
+ // type MemVT. If the underlying value is not the correct
+ // type, but it is an extraction of an appropriate vector we
+ // can recast Val to be of the correct type. This may require
+ // converting between EXTRACT_VECTOR_ELT and
+ // EXTRACT_SUBVECTOR.
+ if ((MemVT != Val.getValueType()) &&
+ (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
+ EVT MemVTScalarTy = MemVT.getScalarType();
+ // We may need to add a bitcast here to get types to line up.
+ if (MemVTScalarTy != Val.getValueType().getScalarType()) {
+ Val = DAG.getBitcast(MemVT, Val);
+ } else {
+ unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ SDValue Vec = Val.getOperand(0);
+ SDValue Idx = Val.getOperand(1);
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
+ }
+ }
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ APInt StoreInt(SizeInBits, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+
+ SDValue Val = St->getValue();
+ Val = peekThroughBitcasts(Val);
+ StoreInt <<= ElementSizeBits;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF()
+ .bitcastToAPInt()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ // If fp truncation is necessary give up for now.
+ if (MemVT.getSizeInBits() != ElementSizeBits)
+ return false;
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+ }
+
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
+
+ // make sure we use trunc store if it's necessary to be legal.
+ SDValue NewStore;
+ if (!UseTrunc) {
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlignment());
+ } else { // Must be realized as a trunc store
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
+ ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
+ SDValue ExtendedStoreVal =
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
+ LegalizedStoredValTy);
+ NewStore = DAG.getTruncStore(
+ NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+
+ AddToWorklist(NewChain.getNode());
+ return true;
+}
+
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&RootNode) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ EVT MemVT = St->getMemoryVT();
+
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return;
+
+ bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
+ bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsLoadSrc = isa<LoadSDNode>(Val);
+ BaseIndexOffset LBasePtr;
+ // Match on loadbaseptr if relevant.
+ EVT LoadVT;
+ if (IsLoadSrc) {
+ auto *Ld = cast<LoadSDNode>(Val);
+ LBasePtr = BaseIndexOffset::match(Ld, DAG);
+ LoadVT = Ld->getMemoryVT();
+ // Load and store should be the same type.
+ if (MemVT != LoadVT)
+ return;
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ return;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Ld->isSimple() || Ld->isIndexed())
+ return;
+ }
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
+ int64_t &Offset) -> bool {
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Other->isSimple() || Other->isIndexed())
+ return false;
+ // Don't mix temporal stores with non-temporal stores.
+ if (St->isNonTemporal() != Other->isNonTemporal())
+ return false;
+ SDValue OtherBC = peekThroughBitcasts(Other->getValue());
+ // Allow merging constants of different types as integers.
+ bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
+ : Other->getMemoryVT() != MemVT;
+ if (IsLoadSrc) {
+ if (NoTypeMatch)
+ return false;
+ // The Load's Base Ptr must also match
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() ||
+ OtherLd->isIndexed())
+ return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ } else
+ return false;
+ }
+ if (IsConstantSrc) {
+ if (NoTypeMatch)
+ return false;
+ if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
+ return false;
+ }
+ if (IsExtractVecSrc) {
+ // Do not merge truncated stores here.
+ if (Other->isTruncatingStore())
+ return false;
+ if (!MemVT.bitsEq(OtherBC.getValueType()))
+ return false;
+ if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+ }
+ Ptr = BaseIndexOffset::match(Other, DAG);
+ return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
+ };
+
+ // Check if the pair of StoreNode and the RootNode already bail out many
+ // times which is over the limit in dependence check.
+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
+ SDNode *RootNode) -> bool {
+ auto RootCount = StoreRootCountMap.find(StoreNode);
+ if (RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit)
+ return true;
+ return false;
+ };
+
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ RootNode = St->getChain().getNode();
+
+ unsigned NumNodesExplored = 0;
+ if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ if (I2.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
+ }
+ } else
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
+ if (I.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
+ }
+}
+
+// We need to check that merging these stores does not cause a loop in
+// the DAG. Any store candidate may depend on another candidate
+// indirectly through its operand (we already consider dependencies
+// through the chain). Check in parallel by searching up from
+// non-chain operands of candidates.
+bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode) {
+ // FIXME: We should be able to truncate a full search of
+ // predecessors by doing a BFS and keeping tabs the originating
+ // stores from which worklist nodes come from in a similar way to
+ // TokenFactor simplfication.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+
+ // RootNode is a predecessor to all candidates so we need not search
+ // past it. Add RootNode (peeking through TokenFactors). Do not count
+ // these towards size check.
+
+ Worklist.push_back(RootNode);
+ while (!Worklist.empty()) {
+ auto N = Worklist.pop_back_val();
+ if (!Visited.insert(N).second)
+ continue; // Already present in Visited.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (SDValue Op : N->ops())
+ Worklist.push_back(Op.getNode());
+ }
+ }
+
+ // Don't count pruning nodes towards max.
+ unsigned int Max = 1024 + Visited.size();
+ // Search Ops of store candidates.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ SDNode *N = StoreNodes[i].MemNode;
+ // Of the 4 Store Operands:
+ // * Chain (Op 0) -> We have already considered these
+ // in candidate selection and can be
+ // safely ignored
+ // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
+ // but aren't necessarily fromt the same base node, so
+ // cycles possible (e.g. via indexed store).
+ // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
+ // non-indexed stores). Not constant on all targets (e.g. ARM)
+ // and so can participate in a cycle.
+ for (unsigned j = 1; j < N->getNumOperands(); ++j)
+ Worklist.push_back(N->getOperand(j).getNode());
+ }
+ // Search through DAG. We can stop early if we find a store node.
+ for (unsigned i = 0; i < NumStores; ++i)
+ if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
+ Max)) {
+ // If the searching bail out, record the StoreNode and RootNode in the
+ // StoreRootCountMap. If we have seen the pair many times over a limit,
+ // we won't add the StoreNode into StoreNodes set again.
+ if (Visited.size() >= Max) {
+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
+ if (RootCount.first == RootNode)
+ RootCount.second++;
+ else
+ RootCount = {RootNode, 1};
+ }
+ return false;
+ }
+ return true;
+}
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ return false;
+
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+ if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
+ bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ return false;
+
+ if (!MemVT.isSimple())
+ return false;
+
+ // Perform an early exit check. Do not bother looking at stored values that
+ // are not constants, loads, or extracted vector elements.
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
+ bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad =
+ IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the
+ // base pointer.
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
+
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+
+ bool RV = false;
+ while (StoreNodes.size() > 1) {
+ size_t StartIdx = 0;
+ while ((StartIdx + 1 < StoreNodes.size()) &&
+ StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
+ StoreNodes[StartIdx + 1].OffsetFromBase)
+ ++StartIdx;
+
+ // Bail if we don't have enough candidates to merge.
+ if (StartIdx + 1 >= StoreNodes.size())
+ return RV;
+
+ if (StartIdx)
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive store memory address.
+ unsigned NumConsecutiveStores = 1;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ NumConsecutiveStores = i + 1;
+ }
+
+ if (NumConsecutiveStores < 2) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumConsecutiveStores);
+ continue;
+ }
+
+ // The node with the lowest store address.
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+
+ // Store the constants into memory as one consecutive store.
+ if (IsConstantSrc) {
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
+
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = false;
+ LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
+ }
+ }
+
+ // We only use vectors if the constant is known to be zero or the
+ // target allows it and the function is not marked with the
+ // noimplicitfloat attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(
+ Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
+ }
+
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
+ UseVector, LastIntegerTrunc);
+
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ continue;
+ }
+
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecSrc) {
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
+
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
+
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(
+ StoreNodes, NumStoresToMerge, RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ continue;
+ }
+
+ RV |= MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ }
+ continue;
+ }
+
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
+ // If this is not the first ptr that we check.
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
+ // The base ptr must be the same.
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ }
+
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 &&
+ TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+ break;
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+
+ unsigned LastConsecutiveLoad = 1;
+
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
+
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
+ }
+ }
+
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem =
+ std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags LdMMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags =
+ IsNonTemporalStore ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad =
+ DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ FirstLoadAlign, LdMMOFlags);
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, LdMMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ JointMemOpVT, FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace the all stores with the new store. Recursively remove
+ // corresponding value if its no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ }
+ return RV;
+}
+
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ ST->isSimple()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (ST->isSimple() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ Alignment, MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ EVT SVT = Value.getOperand(0).getValueType();
+ // If the store is volatile, we only want to change the store type if the
+ // resulting store is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (((!LegalOperations && ST->isSimple()) ||
+ TLI.isOperationLegal(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+ DAG, *ST->getMemOperand())) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ }
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.isUndef() && ST->isUnindexed())
+ return Chain;
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
+ ST->getMemoryVT(), Align,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ // NewStore will always be N as we are only refining the alignment
+ assert(NewStore.getNode() == N);
+ (void)NewStore;
+ }
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ if (SDValue NewST = TransformFPLoadStorePair(N))
+ return NewST;
+
+ // Try transforming several stores into STORE (BSWAP).
+ if (SDValue Store = MatchStoreCombine(ST))
+ return Store;
+
+ if (ST->isUnindexed()) {
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
+ }
+ Chain = ST->getChain();
+ }
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits());
+
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ AddToWorklist(Value.getNode());
+ if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && ST->isSimple() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST->isUnindexed() && ST->isSimple() &&
+ ST1->isUnindexed() && ST1->isSimple()) {
+ if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the
+ // same location, then the store is dead/noop.
+ return Chain;
+ }
+
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef()) {
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they may
+ // be used as data sinks.
+ if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+
+ // If ST stores to a subset of preceding store's write set, we may be
+ // able to fold ST's value into the preceding stored value. As we know
+ // the other uses of ST1's chain are unconcerned with ST, this folding
+ // will not affect those nodes.
+ int64_t BitOffset;
+ if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
+ BitOffset)) {
+ SDValue ChainValue = ST1->getValue();
+ if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = C1->getAPIntValue();
+ APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
+ // FIXME: Handle Big-endian mode.
+ if (!DAG.getDataLayout().isBigEndian()) {
+ Val.insertBits(InsertVal, BitOffset);
+ SDValue NewSDVal =
+ DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
+ C1->isTargetOpcode(), C1->isOpaque());
+ SDNode *NewST1 = DAG.UpdateNodeOperands(
+ ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
+ ST1->getOperand(3));
+ return CombineTo(ST, SDValue(NewST1, 0));
+ }
+ }
+ }
+ } // End ST subset of ST1 case.
+ }
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ }
+
+ // Always perform this optimization before types are legal. If the target
+ // prefers, also try this after legalization to catch stores that were created
+ // by intrinsics or other nodes.
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
+ while (true) {
+ // There can be multiple store sequences on the same chain.
+ // Keep trying to merge store sequences until we are unable to do so
+ // or until we merge the last store on the chain.
+ bool Changed = MergeConsecutiveStores(ST);
+ if (!Changed) break;
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
+ return SDValue(N, 0);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
+ if (SDValue NewSt = splitMergedValStore(ST))
+ return NewSt;
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+ const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+ if (!LifetimeEnd->hasOffset())
+ return SDValue();
+
+ const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+ LifetimeEnd->getOffset(), false);
+
+ // We walk up the chains to find stores.
+ SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+ if (!Chain.hasOneUse())
+ continue;
+ switch (Chain.getOpcode()) {
+ case ISD::TokenFactor:
+ for (unsigned Nops = Chain.getNumOperands(); Nops;)
+ Chains.push_back(Chain.getOperand(--Nops));
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the node.
+ if (!isAlias(Chain.getNode(), N))
+ Chains.push_back(Chain.getOperand(0));
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (!ST->isSimple() || ST->isIndexed())
+ continue;
+ const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+ // If we store purely within object bounds just before its lifetime ends,
+ // we can remove the store.
+ if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+ ST->getMemoryVT().getStoreSizeInBits())) {
+ LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+ dbgs() << "\nwithin LIFETIME_END of : ";
+ LifetimeEndBase.dump(); dbgs() << "\n");
+ CombineTo(ST, ST->getChain());
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ // Can't change the number of memory accesses for a volatile store or break
+ // atomicity for an atomic one.
+ if (!ST->isSimple())
+ return SDValue();
+
+ SDValue Val = ST->getValue();
+ SDLoc DL(ST);
+
+ // Match OR operand.
+ if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+ return SDValue();
+
+ // Match SHL operand and get Lower and Higher parts of Val.
+ SDValue Op1 = Val.getOperand(0);
+ SDValue Op2 = Val.getOperand(1);
+ SDValue Lo, Hi;
+ if (Op1.getOpcode() != ISD::SHL) {
+ std::swap(Op1, Op2);
+ if (Op1.getOpcode() != ISD::SHL)
+ return SDValue();
+ }
+ Lo = Op2;
+ Hi = Op1.getOperand(0);
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Match shift amount to HalfValBitSize.
+ unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+ return SDValue();
+
+ // Lo and Hi are zero-extended from int with size less equal than 32
+ // to i64.
+ if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+ !Lo.getOperand(0).getValueType().isScalarInteger() ||
+ Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+ Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+ !Hi.getOperand(0).getValueType().isScalarInteger() ||
+ Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+ return SDValue();
+
+ // Use the EVT of low and high parts before bitcast as the input
+ // of target query.
+ EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Lo.getOperand(0).getValueType()
+ : Lo.getValueType();
+ EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Hi.getOperand(0).getValueType()
+ : Hi.getValueType();
+ if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return SDValue();
+
+ // Start to split store.
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+ EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ // Lower value store.
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
+ Ptr =
+ DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ // Higher value store.
+ SDValue St1 =
+ DAG.getStore(St0, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ Alignment / 2, MMOFlags, AAInfo);
+ return St1;
+}
+
+/// Convert a disguised subvector insertion into a shuffle:
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
+
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
+ // --> (vector_shuffle X, Y)
+ if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
+ InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(InsertVal.getOperand(1))) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ int XOffset = -1;
+ if (InsertVal.getOperand(0) == X) {
+ XOffset = 0;
+ } else if (InsertVal.getOperand(0) == Y) {
+ XOffset = X.getValueType().getVectorNumElements();
+ }
+
+ if (XOffset != -1) {
+ SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+
+ auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
+ NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ assert(NewMask[InsIndex] <
+ (int)(2 * Vec.getValueType().getVectorNumElements()) &&
+ NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
+ Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+
+ // insert_vector_elt V, (bitcast X from vector type), IdxC -->
+ // bitcast(shuffle (bitcast V), (extended X), Mask)
+ // Note: We do not use an insert_subvector node because that requires a
+ // legal subvector type.
+ if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
+ !InsertVal.getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ SDValue SubVec = InsertVal.getOperand(0);
+ SDValue DestVec = N->getOperand(0);
+ EVT SubVecVT = SubVec.getValueType();
+ EVT VT = DestVec.getValueType();
+ unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
+ unsigned NumMaskVals = ExtendRatio * NumSrcElts;
+
+ // Step 1: Create a shuffle mask that implements this insert operation. The
+ // vector that we are inserting into will be operand 0 of the shuffle, so
+ // those elements are just 'i'. The inserted subvector is in the first
+ // positions of operand 1 of the shuffle. Example:
+ // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
+ SmallVector<int, 16> Mask(NumMaskVals);
+ for (unsigned i = 0; i != NumMaskVals; ++i) {
+ if (i / NumSrcElts == InsIndex)
+ Mask[i] = (i % NumSrcElts) + NumMaskVals;
+ else
+ Mask[i] = i;
+ }
+
+ // Bail out if the target can not handle the shuffle we want to create.
+ EVT SubVecEltVT = SubVecVT.getVectorElementType();
+ EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
+ if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
+ return SDValue();
+
+ // Step 2: Create a wide vector from the inserted source vector by appending
+ // undefined elements. This is the same size as our destination vector.
+ SDLoc DL(N);
+ SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
+ ConcatOps[0] = SubVec;
+ SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
+
+ // Step 3: Shuffle in the padded subvector.
+ SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
+ SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
+ AddToWorklist(PaddedSubV.getNode());
+ AddToWorklist(DestVecBC.getNode());
+ AddToWorklist(Shuf.getNode());
+ return DAG.getBitcast(VT, Shuf);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ SDLoc DL(N);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.isUndef())
+ return InVec;
+
+ EVT VT = InVec.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Remove redundant insertions:
+ // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
+ return InVec;
+
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (!IndexC) {
+ // If this is variable insert to undef vector, it might be better to splat:
+ // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
+ SmallVector<SDValue, 8> Ops(NumElts, InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+ return SDValue();
+ }
+
+ // We must know which element is being inserted for folds below here.
+ unsigned Elt = IndexC->getZExtValue();
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
+
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt = InVec.getConstantOperandVal(2);
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ // Do not combine these two vectors if the output vector will not replace
+ // the input vector.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.isUndef()) {
+ Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
+ }
+ assert(Ops.size() == NumElts && "Unexpected vector size");
+
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ }
+
+ // Return the new vector
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
+SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+ SDValue EltNo,
+ LoadSDNode *OriginalLoad) {
+ assert(OriginalLoad->isSimple());
+
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
+ ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ SDLoc DL(EVE);
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ Offset = DAG.getConstant(PtrOff, DL, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
+ Offset = DAG.getNode(
+ ISD::MUL, DL, PtrType, Offset,
+ DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ }
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
+ VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
+ OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
+ Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
+ MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getBitcast(ResultVT, Load);
+ }
+ WorklistRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorklist(EVE);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddUsersToWorklist(Load.getNode()); // Add users too
+ AddToWorklist(Load.getNode());
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+ bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+ Vec.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ // Targets may want to avoid this to prevent an expensive register transfer.
+ if (!TLI.shouldScalarizeBinop(Vec))
+ return SDValue();
+
+ // Extracting an element of a vector constant is constant-folded, so this
+ // transform is just replacing a vector op with a scalar op while moving the
+ // extract.
+ SDValue Op0 = Vec.getOperand(0);
+ SDValue Op1 = Vec.getOperand(1);
+ if (isAnyConstantBuildVector(Op0, true) ||
+ isAnyConstantBuildVector(Op1, true)) {
+ // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+ // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+ SDLoc DL(ExtElt);
+ EVT VT = ExtElt->getValueType(0);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue VecOp = N->getOperand(0);
+ SDValue Index = N->getOperand(1);
+ EVT ScalarVT = N->getValueType(0);
+ EVT VecVT = VecOp.getValueType();
+ if (VecOp.isUndef())
+ return DAG.getUNDEF(ScalarVT);
+
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ SDLoc DL(N);
+ if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ Index == VecOp.getOperand(2)) {
+ SDValue Elt = VecOp.getOperand(1);
+ return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
+ }
+
+ // (vextract (scalar_to_vector val, 0) -> val
+ if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = VecOp.getOperand(0);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
+ }
+ return InOp;
+ }
+
+ // extract_vector_elt of out-of-bounds element -> UNDEF
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+ return DAG.getUNDEF(ScalarVT);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VecVT) &&
+ (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+ SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (ScalarVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
+
+ // TODO: These transforms should not require the 'hasOneUse' restriction, but
+ // there are regressions on multiple targets without it. We can end up with a
+ // mess of scalar and vector code if we reduce only part of the DAG to scalar.
+ if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
+ VecOp.hasOneUse()) {
+ // The vector index of the LSBs of the source depend on the endian-ness.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ExtractIndex = IndexC->getZExtValue();
+ // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+ unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
+ SDValue BCSrc = VecOp.getOperand(0);
+ if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+
+ if (LegalTypes && BCSrc.getValueType().isInteger() &&
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
+ // trunc i64 X to i32
+ SDValue X = BCSrc.getOperand(0);
+ assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
+ "Extract element and scalar to vector can't change element type "
+ "from FP to integer.");
+ unsigned XBitWidth = X.getValueSizeInBits();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
+
+ // An extract element return value type can be wider than its vector
+ // operand element type. In that case, the high bits are undefined, so
+ // it's possible that we may need to extend rather than truncate.
+ if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
+ assert(XBitWidth % VecEltBitWidth == 0 &&
+ "Scalar bitwidth must be a multiple of vector element bitwidth");
+ return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
+ }
+ }
+ }
+
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector. However, if we can find an underlying
+ // scalar value, then we can always use that.
+ if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
+ // Find the new index to extract from.
+ int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(ScalarVT);
+
+ // Select the right vector half to extract from.
+ SDValue SVInVec;
+ if (OrigElt < (int)NumElts) {
+ SVInVec = VecOp.getOperand(0);
+ } else {
+ SVInVec = VecOp.getOperand(1);
+ OrigElt -= NumElts;
+ }
+
+ if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue InOp = SVInVec.getOperand(OrigElt);
+ if (InOp.getValueType() != ScalarVT) {
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
+ }
+
+ return InOp;
+ }
+
+ // FIXME: We should handle recursing on other vector shuffles and
+ // scalar_to_vector here as well.
+
+ if (!LegalOperations ||
+ // FIXME: Should really be just isOperationLegalOrCustom.
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
+ DAG.getConstant(OrigElt, DL, IndexTy));
+ }
+ }
+
+ // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+ // simplify it based on the (valid) extraction indices.
+ if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Use->getOperand(0) == VecOp &&
+ isa<ConstantSDNode>(Use->getOperand(1));
+ })) {
+ APInt DemandedElts = APInt::getNullValue(NumElts);
+ for (SDNode *Use : VecOp->uses()) {
+ auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+ if (CstElt->getAPIntValue().ult(NumElts))
+ DemandedElts.setBit(CstElt->getZExtValue());
+ }
+ if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // Everything under here is trying to match an extract of a loaded value.
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VecVT.getVectorElementType();
+ EVT LVT = ExtVT;
+ if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
+ return SDValue();
+
+ if (VecOp.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = VecOp.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (NumElts != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ VecOp = VecOp.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // extract (vector load $addr), i --> load $addr + i * size
+ if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
+ ISD::isNormalLoad(VecOp.getNode()) &&
+ !Index->hasPredecessor(VecOp.getNode())) {
+ auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
+ if (VecLoad && VecLoad->isSimple())
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations || !IndexC)
+ return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ int Elt = IndexC->getZExtValue();
+ LoadSDNode *LN0 = nullptr;
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ VecOp.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
+ }
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
+ VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
+
+ if (VecOp.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!VecOp.hasOneUse())
+ return SDValue();
+
+ VecOp = VecOp.getOperand(0);
+ }
+ if (ISD::isNormalLoad(VecOp.getNode())) {
+ LN0 = cast<LoadSDNode>(VecOp);
+ Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
+ Index = DAG.getConstant(Elt, DL, Index.getValueType());
+ }
+ }
+
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
+ return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
+}
+
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = VT.getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, DL, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.isUndef()) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.isUndef())
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT) ||
+ (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
+ return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorklist(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getBitcast(VT, BV);
+}
+
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx, bool DidSplitVec) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+
+ EVT VT = N->getValueType(0);
+ EVT InVT1 = VecIn1.getValueType();
+ EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ShuffleNumElems = NumElems;
+
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Try to make the types match the type of the output.
+ if (InVT1 != VT || InVT2 != VT) {
+ if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ // If the output vector length is a multiple of both input lengths,
+ // we can concatenate them and pad the rest with undefs.
+ unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+ SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+ ConcatOps[0] = VecIn1;
+ ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ VecIn2 = SDValue();
+ } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
+ return SDValue();
+
+ if (!VecIn2.getNode()) {
+ // If we only have one input vector, and it's twice the size of the
+ // output, split it in two.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+ DAG.getConstant(NumElems, DL, IdxTy));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+ // Since we now have shorter input vectors, adjust the offset of the
+ // second vector's start.
+ Vec2Offset = NumElems;
+ } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
+
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = NumElems * 2;
+ } else {
+ // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
+ // than VecIn1. We can't handle this for now - this case will disappear
+ // when we start sorting the vectors by type.
+ return SDValue();
+ }
+ } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
+ InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ } else {
+ // TODO: Support cases where the length mismatch isn't exactly by a
+ // factor of 2.
+ // TODO: Move this check upwards, so that if we have bad type
+ // mismatches, we don't create any DAG nodes.
+ return SDValue();
+ }
+ }
+
+ // Initialize mask to undef.
+ SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+ // Only need to run up to the number of elements actually used, not the
+ // total number of elements in the shuffle - if we are shuffling a wider
+ // vector, the high lanes should be set to undef.
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] <= 0)
+ continue;
+
+ unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+ if (VectorMask[i] == (int)LeftIdx) {
+ Mask[i] = ExtIndex;
+ } else if (VectorMask[i] == (int)LeftIdx + 1) {
+ Mask[i] = Vec2Offset + ExtIndex;
+ }
+ }
+
+ // The type the input vectors may have changed above.
+ InVT1 = VecIn1.getValueType();
+
+ // If we already have a VecIn2, it should have the same type as VecIn1.
+ // If we don't, get an undef/zero vector of the appropriate type.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+ assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
+
+ SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+ if (ShuffleNumElems > NumElems)
+ Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
+
+ return Shuffle;
+}
+
+static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
+ assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // First, determine where the build vector is not undef.
+ // TODO: We could extend this to handle zero elements as well as undefs.
+ int NumBVOps = BV->getNumOperands();
+ int ZextElt = -1;
+ for (int i = 0; i != NumBVOps; ++i) {
+ SDValue Op = BV->getOperand(i);
+ if (Op.isUndef())
+ continue;
+ if (ZextElt == -1)
+ ZextElt = i;
+ else
+ return SDValue();
+ }
+ // Bail out if there's no non-undef element.
+ if (ZextElt == -1)
+ return SDValue();
+
+ // The build vector contains some number of undef elements and exactly
+ // one other element. That other element must be a zero-extended scalar
+ // extracted from a vector at a constant index to turn this into a shuffle.
+ // Also, require that the build vector does not implicitly truncate/extend
+ // its elements.
+ // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ EVT VT = BV->getValueType(0);
+ SDValue Zext = BV->getOperand(ZextElt);
+ if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
+ Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+ Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // The zero-extend must be a multiple of the source size, and we must be
+ // building a vector of the same size as the source of the extract element.
+ SDValue Extract = Zext.getOperand(0);
+ unsigned DestSize = Zext.getValueSizeInBits();
+ unsigned SrcSize = Extract.getValueSizeInBits();
+ if (DestSize % SrcSize != 0 ||
+ Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Create a shuffle mask that will combine the extracted element with zeros
+ // and undefs.
+ int ZextRatio = DestSize / SrcSize;
+ int NumMaskElts = NumBVOps * ZextRatio;
+ SmallVector<int, 32> ShufMask(NumMaskElts, -1);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ if (i / ZextRatio == ZextElt) {
+ // The low bits of the (potentially translated) extracted element map to
+ // the source vector. The high bits map to zero. We will use a zero vector
+ // as the 2nd source operand of the shuffle, so use the 1st element of
+ // that vector (mask value is number-of-elements) for the high bits.
+ if (i % ZextRatio == 0)
+ ShufMask[i] = Extract.getConstantOperandVal(1);
+ else
+ ShufMask[i] = NumMaskElts;
+ }
+
+ // Undef elements of the build vector remain undef because we initialize
+ // the shuffle mask with -1.
+ }
+
+ // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
+ // bitcast (shuffle V, ZeroVec, VectorMask)
+ SDLoc DL(BV);
+ EVT VecVT = Extract.getOperand(0).getValueType();
+ SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
+ ZeroVec, ShufMask, DAG);
+ if (!Shuf)
+ return SDValue();
+ return DAG.getBitcast(VT, Shuf);
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
+ return V;
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
+ bool UsesZeroVector = false;
+ unsigned NumElems = N->getNumOperands();
+
+ // Record, for each element of the newly built vector, which input vector
+ // that element comes from. -1 stands for undef, 0 for the zero vector,
+ // and positive values for the input vectors.
+ // VectorMask maps each element to its vector number, and VecIn maps vector
+ // numbers to their initial SDValues.
+
+ SmallVector<int, 8> VectorMask(NumElems, -1);
+ SmallVector<SDValue, 8> VecIn;
+ VecIn.push_back(SDValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.isUndef())
+ continue;
+
+ // See if we can use a blend with a zero vector.
+ // TODO: Should we generalize this to a blend with an arbitrary constant
+ // vector?
+ if (isNullConstant(Op) || isNullFPConstant(Op)) {
+ UsesZeroVector = true;
+ VectorMask[i] = 0;
+ continue;
+ }
+
+ // Not an undef or zero. If the input is something other than an
+ // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ SDValue ExtractedFromVec = Op.getOperand(0);
+
+ const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
+ if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
+ return SDValue();
+
+ // All inputs must have the same element type as the output.
+ if (VT.getVectorElementType() !=
+ ExtractedFromVec.getValueType().getVectorElementType())
+ return SDValue();
+
+ // Have we seen this input vector before?
+ // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+ // a map back from SDValues to numbers isn't worth it.
+ unsigned Idx = std::distance(
+ VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ if (Idx == VecIn.size())
+ VecIn.push_back(ExtractedFromVec);
+
+ VectorMask[i] = Idx;
+ }
+
+ // If we didn't find at least one input vector, bail out.
+ if (VecIn.size() < 2)
+ return SDValue();
+
+ // If all the Operands of BUILD_VECTOR extract from same
+ // vector, then split the vector efficiently based on the maximum
+ // vector access index and adjust the VectorMask and
+ // VecIn accordingly.
+ bool DidSplitVec = false;
+ if (VecIn.size() == 2) {
+ unsigned MaxIndex = 0;
+ unsigned NearestPow2 = 0;
+ SDValue Vec = VecIn.back();
+ EVT InVT = Vec.getValueType();
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SmallVector<unsigned, 8> IndexVec(NumElems, 0);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ unsigned Index = N->getOperand(i).getConstantOperandVal(1);
+ IndexVec[i] = Index;
+ MaxIndex = std::max(MaxIndex, Index);
+ }
+
+ NearestPow2 = PowerOf2Ceil(MaxIndex);
+ if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
+ NumElems * 2 < NearestPow2) {
+ unsigned SplitSize = NearestPow2 / 2;
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), SplitSize);
+ if (TLI.isTypeLegal(SplitVT)) {
+ SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(SplitSize, DL, IdxTy));
+ SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(0, DL, IdxTy));
+ VecIn.pop_back();
+ VecIn.push_back(VecIn1);
+ VecIn.push_back(VecIn2);
+ DidSplitVec = true;
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
+ }
+ }
+ }
+ }
+
+ // TODO: We want to sort the vectors by descending length, so that adjacent
+ // pairs have similar length, and the longer vector is always first in the
+ // pair.
+
+ // TODO: Should this fire if some of the input vectors has illegal type (like
+ // it does now), or should we let legalization run its course first?
+
+ // Shuffle phase:
+ // Take pairs of vectors, and shuffle them so that the result has elements
+ // from these vectors in the correct places.
+ // For example, given:
+ // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+ // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+ // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+ // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+ // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+ // We will generate:
+ // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+ // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+ SmallVector<SDValue, 4> Shuffles;
+ for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+ unsigned LeftIdx = 2 * In + 1;
+ SDValue VecLeft = VecIn[LeftIdx];
+ SDValue VecRight =
+ (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+ if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+ VecRight, LeftIdx, DidSplitVec))
+ Shuffles.push_back(Shuffle);
+ else
+ return SDValue();
+ }
+
+ // If we need the zero vector as an "ingredient" in the blend tree, add it
+ // to the list of shuffles.
+ if (UsesZeroVector)
+ Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+ : DAG.getConstantFP(0.0, DL, VT));
+
+ // If we only have one shuffle, we're done.
+ if (Shuffles.size() == 1)
+ return Shuffles[0];
+
+ // Update the vector mask to point to the post-shuffle vectors.
+ for (int &Vec : VectorMask)
+ if (Vec == 0)
+ Vec = Shuffles.size() - 1;
+ else
+ Vec = (Vec - 1) / 2;
+
+ // More than one shuffle. Generate a binary tree of blends, e.g. if from
+ // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+ // generate:
+ // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+ // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+ // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+ // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+ // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+ // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+ // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+ // Make sure the initial size of the shuffle list is even.
+ if (Shuffles.size() % 2)
+ Shuffles.push_back(DAG.getUNDEF(VT));
+
+ for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+ if (CurSize % 2) {
+ Shuffles[CurSize] = DAG.getUNDEF(VT);
+ CurSize++;
+ }
+ for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+ int Left = 2 * In;
+ int Right = 2 * In + 1;
+ SmallVector<int, 8> Mask(NumElems, -1);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] == Left) {
+ Mask[i] = i;
+ VectorMask[i] = In;
+ } else if (VectorMask[i] == Right) {
+ Mask[i] = i + NumElems;
+ VectorMask[i] = In;
+ }
+ }
+
+ Shuffles[In] =
+ DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
+ }
+ }
+ return Shuffles[0];
+}
+
+// Try to turn a build vector of zero extends of extract vector elts into a
+// a vector zero extend and possibly an extract subvector.
+// TODO: Support sign extend?
+// TODO: Allow undef elements?
+SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
+ if (LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ bool FoundZeroExtend = false;
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> int64_t {
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
+ Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
+ return C->getZExtValue();
+ return -1;
+ };
+
+ // Make sure the first element matches
+ // (zext (extract_vector_elt X, C))
+ int64_t Offset = checkElem(Op0);
+ if (Offset < 0)
+ return SDValue();
+
+ unsigned NumElems = N->getNumOperands();
+ SDValue In = Op0.getOperand(0).getOperand(0);
+ EVT InSVT = In.getValueType().getScalarType();
+ EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
+
+ // Don't create an illegal input type after type legalization.
+ if (LegalTypes && !TLI.isTypeLegal(InVT))
+ return SDValue();
+
+ // Ensure all the elements come from the same vector and are adjacent.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ if ((Offset + i) != checkElem(N->getOperand(i)))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
+ Op0.getOperand(0).getOperand(1));
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ // If this is a splat of a bitcast from another vector, change to a
+ // concat_vector.
+ // For example:
+ // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
+ // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
+ //
+ // If X is a build_vector itself, the concat can become a larger build_vector.
+ // TODO: Maybe this is useful for non-splat too?
+ if (!LegalOperations) {
+ if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ Splat = peekThroughBitcasts(Splat);
+ EVT SrcVT = Splat.getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), NumElts);
+ if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
+ NewVT, Ops);
+ return DAG.getBitcast(VT, Concat);
+ }
+ }
+ }
+ }
+
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
+ if (SDValue V = convertBuildVecZextToZext(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecToShuffle(N))
+ return V;
+
+ return SDValue();
+}
+
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // If the operands are legal vectors, leave them alone.
+ if (TLI.isTypeLegal(OpVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 8> Ops;
+
+ EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+ // Keep track of what we encounter.
+ bool AnyInteger = false;
+ bool AnyFP = false;
+ for (const SDValue &Op : N->ops()) {
+ if (ISD::BITCAST == Op.getOpcode() &&
+ !Op.getOperand(0).getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else if (ISD::UNDEF == Op.getOpcode())
+ Ops.push_back(ScalarUndef);
+ else
+ return SDValue();
+
+ // Note whether we encounter an integer or floating point scalar.
+ // If it's neither, bail out, it could be something weird like x86mmx.
+ EVT LastOpVT = Ops.back().getValueType();
+ if (LastOpVT.isFloatingPoint())
+ AnyFP = true;
+ else if (LastOpVT.isInteger())
+ AnyInteger = true;
+ else
+ return SDValue();
+ }
+
+ // If any of the operands is a floating point scalar bitcast to a vector,
+ // use floating point types throughout, and bitcast everything.
+ // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+ if (AnyFP) {
+ SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+ ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+ if (AnyInteger) {
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = ScalarUndef;
+ else
+ Op = DAG.getBitcast(SVT, Op);
+ }
+ }
+ }
+
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
+}
+
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ Op = peekThroughBitcasts(Op);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+ ExtVec = peekThroughBitcasts(ExtVec);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ int ExtIdx = Op.getConstantOperandVal(1);
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.isUndef() || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.isUndef() || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask, DAG);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ // Check if all of the operands are undefs.
+ EVT VT = N->getValueType(0);
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ // Optimize concat_vectors where all but the first of the vectors are undef.
+ if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+ return Op.isUndef();
+ })) {
+ SDValue In = N->getOperand(0);
+ assert(In.getValueType().isVector() && "Must concat vectors");
+
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
+ SDValue Scalar = peekThroughOneUseBitcasts(In);
+
+ // concat_vectors(scalar_to_vector(scalar), undef) ->
+ // scalar_to_vector(scalar)
+ if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Scalar.hasOneUse()) {
+ EVT SVT = Scalar.getValueType().getVectorElementType();
+ if (SVT == Scalar.getOperand(0).getValueType())
+ Scalar = Scalar.getOperand(0);
+ }
+
+ // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
+ if (!Scalar.getValueType().isVector()) {
+ // If the bitcast type isn't legal, it might be a trunc of a legal type;
+ // look through the trunc so we can still do the transform:
+ // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+ if (Scalar->getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTypeLegal(Scalar.getValueType()) &&
+ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+ Scalar = Scalar->getOperand(0);
+
+ EVT SclTy = Scalar.getValueType();
+
+ if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
+ return SDValue();
+
+ // Bail out if the vector size is not a multiple of the scalar size.
+ if (VT.getSizeInBits() % SclTy.getSizeInBits())
+ return SDValue();
+
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
+ if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
+ // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+ // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
+ SmallVector<SDValue, 8> Opnds;
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0).getValueType();
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
+ }
+
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+
+ // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+
+ // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+ // nodes often generate nop CONCAT_VECTOR nodes.
+ // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+ // place the incoming vectors at the exact same location.
+ SDValue SingleSource = SDValue();
+ unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.isUndef())
+ continue;
+
+ // Check if this is the identity extract:
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // Find the single incoming vector for the extract_subvector.
+ if (SingleSource.getNode()) {
+ if (Op.getOperand(0) != SingleSource)
+ return SDValue();
+ } else {
+ SingleSource = Op.getOperand(0);
+
+ // Check the source type is the same as the type of the result.
+ // If not, this concat may extend the vector, so we can not
+ // optimize it away.
+ if (SingleSource.getValueType() != N->getValueType(0))
+ return SDValue();
+ }
+
+ auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // The extract index must be constant.
+ if (!CS)
+ return SDValue();
+
+ // Check that we are reading from the identity index.
+ unsigned IdentityIndex = i * PartNumElem;
+ if (CS->getAPIntValue() != IdentityIndex)
+ return SDValue();
+ }
+
+ if (SingleSource.getNode())
+ return SingleSource;
+
+ return SDValue();
+}
+
+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+// if the subvector can be sourced for free.
+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == SubVT &&
+ (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+}
+
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = Extract->getOperand(0);
+ unsigned BinOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ EVT VecVT = BinOp.getValueType();
+ SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
+ return SDValue();
+
+ SDValue Index = Extract->getOperand(1);
+ EVT SubVT = Extract->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
+ return SDValue();
+
+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
+
+ // TODO: We could handle the case where only 1 operand is being inserted by
+ // creating an extract of the other operand, but that requires checking
+ // number of uses and/or costs.
+ if (!Sub0 || !Sub1)
+ return SDValue();
+
+ // We are inserting both operands of the wide binop only to extract back
+ // to the narrow vector size. Eliminate all of the insert/extract:
+ // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
+ BinOp->getFlags());
+}
+
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
+ // some of these bailouts with other transforms.
+
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ return V;
+
+ // The extract index must be a constant, so we can map it to a concat operand.
+ auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndexC)
+ return SDValue();
+
+ // We are looking for an optionally bitcasted wide vector binary operator
+ // feeding an extract subvector.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
+ unsigned BOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ // The binop must be a vector type, so we can extract some fraction of it.
+ EVT WideBVT = BinOp.getValueType();
+ if (!WideBVT.isVector())
+ return SDValue();
+
+ EVT VT = Extract->getValueType(0);
+ unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+ assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+ "Extract index is not a multiple of the vector length.");
+
+ // Bail out if this is not a proper multiple width extraction.
+ unsigned WideWidth = WideBVT.getSizeInBits();
+ unsigned NarrowWidth = VT.getSizeInBits();
+ if (WideWidth % NarrowWidth != 0)
+ return SDValue();
+
+ // Bail out if we are extracting a fraction of a single operation. This can
+ // occur because we potentially looked through a bitcast of the binop.
+ unsigned NarrowingRatio = WideWidth / NarrowWidth;
+ unsigned WideNumElts = WideBVT.getVectorNumElements();
+ if (WideNumElts % NarrowingRatio != 0)
+ return SDValue();
+
+ // Bail out if the target does not support a narrower version of the binop.
+ EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
+ WideNumElts / NarrowingRatio);
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ return SDValue();
+
+ // If extraction is cheap, we don't need to look at the binop operands
+ // for concat ops. The narrow binop alone makes this transform profitable.
+ // We can't just reuse the original extract index operand because we may have
+ // bitcasted.
+ unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
+ BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
+ // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+ SDLoc DL(Extract);
+ SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), NewExtIndex);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), NewExtIndex);
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
+ BinOp.getNode()->getFlags());
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ if (NarrowingRatio != 2)
+ return SDValue();
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
+
+ // We need at least one concatenation operation of a binop operand to make
+ // this transform worthwhile. The concat must double the input vector sizes.
+ auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+ return V.getOperand(ConcatOpNum);
+ return SDValue();
+ };
+ SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+ SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+ if (SubVecL || SubVecR) {
+ // If a binop operand was not the result of a concat, we must extract a
+ // half-sized operand for our new narrow binop:
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+ SDLoc DL(Extract);
+ SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), IndexC);
+
+ SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), IndexC);
+
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
+
+ return SDValue();
+}
+
+/// If we are extracting a subvector from a wide vector load, convert to a
+/// narrow load to eliminate the extraction:
+/// (extract_subvector (load wide vector)) --> (load narrow vector)
+static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Add support for big-endian. The offset calculation must be adjusted.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
+ !ExtIdx)
+ return SDValue();
+
+ // Allow targets to opt-out.
+ EVT VT = Extract->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
+ return SDValue();
+
+ // The narrow load will be offset from the base address of the old load if
+ // we are extracting from something besides index 0 (little-endian).
+ SDLoc DL(Extract);
+ SDValue BaseAddr = Ld->getOperand(1);
+ unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+
+ // TODO: Use "BaseIndexOffset" to make this more effective.
+ SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
+ VT.getStoreSize());
+ SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
+ return NewLd;
+}
+
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
+ return NarrowLoad;
+
+ // Combine an extract of an extract into a single extract_subvector.
+ // ext (ext X, C), 0 --> ext X, C
+ SDValue Index = N->getOperand(1);
+ if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+ V.getConstantOperandVal(1)) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ V.getOperand(1));
+ }
+ }
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType().isVector()) {
+ SDValue SrcOp = V.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ if ((SrcNumElts % DestNumElts) == 0) {
+ unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+ unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NewExtNumElts);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ // TODO - handle (DestNumElts % SrcNumElts) == 0
+ }
+
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
+ V.getOperand(0).getValueType() == NVT) {
+ unsigned Idx = N->getConstantOperandVal(1);
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ V = peekThroughBitcasts(V);
+
+ // If the input is a build vector. Try to make a smaller build vector.
+ if (V.getOpcode() == ISD::BUILD_VECTOR) {
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
+ EVT InVT = V.getValueType();
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT = NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = IdxC->getZExtValue();
+ IdxVal *= NVT.getScalarSizeInBits();
+ IdxVal /= EltSize;
+
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+ return DAG.getBitcast(NVT, Src);
+ }
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(
+ ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
+ }
+ }
+ }
+ }
+
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same size.
+ EVT SmallVT = V.getOperand(1).getValueType();
+ if (!NVT.bitsEq(SmallVT))
+ return SDValue();
+
+ // Only handle cases where both indexes are constants.
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
+ auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
+ if (InsIdx && ExtIdx) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
+ return DAG.getBitcast(NVT, V.getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ Index);
+ }
+ }
+
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ return NarrowBOp;
+
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+ !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Split the wide shuffle mask into halves. Any mask element that is accessing
+ // operand 1 is offset down to account for narrowing of the vectors.
+ ArrayRef<int> Mask = Shuf->getMask();
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> Mask0(HalfNumElts, -1);
+ SmallVector<int, 16> Mask1(HalfNumElts, -1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+ if (i < HalfNumElts)
+ Mask0[i] = M;
+ else
+ Mask1[i - HalfNumElts] = M;
+ }
+
+ // Ask the target if this is a valid transform.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ HalfNumElts);
+ if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+ !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+ return SDValue();
+
+ // shuffle (concat X, undef), (concat Y, undef), Mask -->
+ // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+ SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+ SDLoc DL(Shuf);
+ SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+ SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SmallVector<SDValue, 4> Ops;
+ EVT ConcatVT = N0.getOperand(0).getValueType();
+ unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+ unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+ auto IsUndefMaskElt = [](int i) { return i == -1; };
+
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
+ llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+ IsUndefMaskElt)) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+ N0.getOperand(1),
+ Mask.slice(0, NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
+ // Look at every vector that's inserted. We're looking for exact
+ // subvector-sized copies from a concatenated vector
+ for (unsigned I = 0; I != NumConcats; ++I) {
+ unsigned Begin = I * NumElemsPerConcat;
+ ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+ // Make sure we're dealing with a copy.
+ if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+ Ops.push_back(DAG.getUNDEF(ConcatVT));
+ continue;
+ }
+
+ int OpIdx = -1;
+ for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+ if (IsUndefMaskElt(SubMask[i]))
+ continue;
+ if ((SubMask[i] % (int)NumElemsPerConcat) != i)
+ return SDValue();
+ int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+ if (0 <= OpIdx && EltOpIdx != OpIdx)
+ return SDValue();
+ OpIdx = EltOpIdx;
+ }
+ assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+ if (OpIdx < (int)N0.getNumOperands())
+ Ops.push_back(N0.getOperand(OpIdx));
+ else
+ Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+}
+
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load. A BUILD_VECTOR where each
+// element is identical is a splat. A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = SVN->getOperand(0);
+ SDValue N1 = SVN->getOperand(1);
+
+ if (!N0->hasOneUse())
+ return SDValue();
+
+ // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+ // discussed above.
+ if (!N1.isUndef()) {
+ if (!N1->hasOneUse())
+ return SDValue();
+
+ bool N0AnyConst = isAnyConstantBuildVector(N0);
+ bool N1AnyConst = isAnyConstantBuildVector(N1);
+ if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+ return SDValue();
+ if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+ return SDValue();
+ }
+
+ // If both inputs are splats of the same value then we can safely merge this
+ // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+ bool IsSplat = false;
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (BV0 && BV1)
+ if (SDValue Splat0 = BV0->getSplatValue())
+ IsSplat = (Splat0 == BV1->getSplatValue());
+
+ SmallVector<SDValue, 8> Ops;
+ SmallSet<SDValue, 16> DuplicateOps;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M < (int)NumElts ? M : M - NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ SDValue Op0 = S.getOperand(0);
+ Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
+ } else {
+ // Operand can't be combined - bail out.
+ return SDValue();
+ }
+ }
+
+ // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+ // generating a splat; semantically, this is fine, but it's likely to
+ // generate low-quality code if the target can't reconstruct an appropriate
+ // shuffle.
+ if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+ if (!IsSplat && !DuplicateOps.insert(Op).second)
+ return SDValue();
+
+ Ops.push_back(Op);
+ }
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+ return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue N0 = SVN->getOperand(0);
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // Check for non power of 2 vector sizes
+ if (NumElts % Scale != 0)
+ continue;
+ if (!isAnyExtend(Scale))
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ if (TLI.isTypeLegal(OutVT))
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
+ DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
+ SDLoc(SVN), OutVT, N0));
+ }
+
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
+
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
+// Combine shuffles of splat-shuffles of the form:
+// shuffle (shuffle V, undef, splat-mask), undef, M
+// If splat-mask contains undef elements, we need to be careful about
+// introducing undef's in the folded mask which are not the result of composing
+// the masks of the shuffles.
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ if (!Shuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Splat || !Splat->isSplat())
+ return SDValue();
+
+ ArrayRef<int> ShufMask = Shuf->getMask();
+ ArrayRef<int> SplatMask = Splat->getMask();
+ assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
+
+ // Prefer simplifying to the splat-shuffle, if possible. This is legal if
+ // every undef mask element in the splat-shuffle has a corresponding undef
+ // element in the user-shuffle's mask or if the composition of mask elements
+ // would result in undef.
+ // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
+ // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
+ // In this case it is not legal to simplify to the splat-shuffle because we
+ // may be exposing the users of the shuffle an undef element at index 1
+ // which was not there before the combine.
+ // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
+ // In this case the composition of masks yields SplatMask, so it's ok to
+ // simplify to the splat-shuffle.
+ // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
+ // In this case the composed mask includes all undef elements of SplatMask
+ // and in addition sets element zero to undef. It is safe to simplify to
+ // the splat-shuffle.
+ auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
+ ArrayRef<int> SplatMask) {
+ for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
+ if (UserMask[i] != -1 && SplatMask[i] == -1 &&
+ SplatMask[UserMask[i]] != -1)
+ return false;
+ return true;
+ };
+ if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+ return Shuf->getOperand(0);
+
+ // Create a new shuffle with a mask that is composed of the two shuffles'
+ // masks.
+ SmallVector<int, 32> NewMask;
+ for (int Idx : ShufMask)
+ NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
+
+ return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
+ Splat->getOperand(0), Splat->getOperand(1),
+ NewMask);
+}
+
+/// If the shuffle mask is taking exactly one element from the first vector
+/// operand and passing through all other elements from the second vector
+/// operand, return the index of the mask element that is choosing an element
+/// from the first operand. Otherwise, return -1.
+static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
+ int MaskSize = Mask.size();
+ int EltFromOp0 = -1;
+ // TODO: This does not match if there are undef elements in the shuffle mask.
+ // Should we ignore undefs in the shuffle mask instead? The trade-off is
+ // removing an instruction (a shuffle), but losing the knowledge that some
+ // vector lanes are not needed.
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask[i] >= 0 && Mask[i] < MaskSize) {
+ // We're looking for a shuffle of exactly one element from operand 0.
+ if (EltFromOp0 != -1)
+ return -1;
+ EltFromOp0 = i;
+ } else if (Mask[i] != i + MaskSize) {
+ // Nothing from operand 1 can change lanes.
+ return -1;
+ }
+ }
+ return EltFromOp0;
+}
+
+/// If a shuffle inserts exactly one element from a source vector operand into
+/// another vector operand and we can access the specified element as a scalar,
+/// then we can eliminate the shuffle.
+static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ // First, check if we are taking one element of a vector and shuffling that
+ // element into another vector.
+ ArrayRef<int> Mask = Shuf->getMask();
+ SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+ SDValue Op0 = Shuf->getOperand(0);
+ SDValue Op1 = Shuf->getOperand(1);
+ int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
+ if (ShufOp0Index == -1) {
+ // Commute mask and check again.
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+ ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
+ if (ShufOp0Index == -1)
+ return SDValue();
+ // Commute operands to match the commuted shuffle mask.
+ std::swap(Op0, Op1);
+ Mask = CommutedMask;
+ }
+
+ // The shuffle inserts exactly one element from operand 0 into operand 1.
+ // Now see if we can access that element as a scalar via a real insert element
+ // instruction.
+ // TODO: We can try harder to locate the element as a scalar. Examples: it
+ // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
+ "Shuffle mask value must be from operand 0");
+ if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
+ if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
+ return SDValue();
+
+ // There's an existing insertelement with constant insertion index, so we
+ // don't need to check the legality/profitability of a replacement operation
+ // that differs at most in the constant value. The target should be able to
+ // lower any of those in a similar way. If not, legalization will expand this
+ // to a scalar-to-vector plus shuffle.
+ //
+ // Note that the shuffle may move the scalar from the position that the insert
+ // element used. Therefore, our new insert element occurs at the shuffle's
+ // mask index value, not the insert's index value.
+ // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
+ SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
+ Op0.getOperand(2).getValueType());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Op0.getOperand(1), NewInsIndex);
+}
+
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.isUndef())
+ return DAG.getCommutedVectorShuffle(*SVN);
+
+ // Remove references to rhs if it is undef
+ if (N1.isUndef()) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
+ }
+
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ return InsElt;
+
+ // A shuffle of a single vector that is a splatted value can always be folded.
+ if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+ return V;
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector.
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ int SplatIndex = SVN->getSplatIndex();
+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ // splat (vector_bo L, R), Index -->
+ // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+ SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+ SDLoc DL(N);
+ EVT EltVT = VT.getScalarType();
+ SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+ SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+ SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
+ N0.getNode()->getFlags());
+ SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+ SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+ return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+ }
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ SDNode *V = N0.getNode();
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!V->getOperand(i).isUndef()) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ SDValue Splatted = V->getOperand(SplatIndex);
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+ }
+ }
+
+ // Simplify source operands based on shuffle mask.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ Level < AfterLegalizeVectorOps &&
+ (N1.isUndef() ||
+ (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+ if (SDValue V = partitionShuffleOfConcats(N, DAG))
+ return V;
+ }
+
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
+ if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+ return Res;
+
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.isUndef() && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+ auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+ if (Scale == 1)
+ return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+ SmallVector<int, 8> NewMask;
+ for (int M : Mask)
+ for (int s = 0; s != Scale; ++s)
+ NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+ return NewMask;
+ };
+
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask =
+ ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+ SmallVector<int, 8> OuterMask =
+ ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getBitcast(ScaleVT, SV0);
+ SV1 = DAG.getBitcast(ScaleVT, SV1);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ TLI.isTypeLegal(VT)) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so that next rule
+ // will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Don't try to fold splats; they're likely to simplify somehow, or they
+ // might be free.
+ if (OtherSV->isSplat())
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ SDValue CurrentVec;
+ if (Idx < (int)NumElts) {
+ // This shuffle index refers to the inner shuffle N0. Lookup the inner
+ // shuffle mask to identify which vector is actually referenced.
+ Idx = OtherSV->getMaskElt(Idx);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
+ : OtherSV->getOperand(1);
+ } else {
+ // This shuffle index references an element within N1.
+ CurrentVec = N1;
+ }
+
+ // Simple case where 'CurrentVec' is UNDEF.
+ if (CurrentVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Canonicalize the shuffle index. We don't know yet if CurrentVec
+ // will be the first or second operand of the combined shuffle.
+ Idx = Idx % NumElts;
+ if (!SV0.getNode() || SV0 == CurrentVec) {
+ // Ok. CurrentVec is the left hand side.
+ // Update the mask accordingly.
+ SV0 = CurrentVec;
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ if (SV1.getNode() && SV1 != CurrentVec)
+ return SDValue();
+
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ }
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
+
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ }
+
+ if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ SDValue InVal = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE and possible truncate.
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue InVec = InVal->getOperand(0);
+ SDValue EltNo = InVal->getOperand(1);
+ auto InVecT = InVec.getValueType();
+ if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
+ SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
+ int Elt = C0->getZExtValue();
+ NewMask[0] = Elt;
+ // If we have an implict truncate do truncate here as long as it's legal.
+ // if it's not legal, this should
+ if (VT.getScalarType() != InVal.getValueType() &&
+ InVal.getValueType().isScalarInteger() &&
+ isTypeLegal(VT.getScalarType())) {
+ SDValue Val =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+ if (VT.getScalarType() == InVecT.getScalarType() &&
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask, DAG);
+ if (LegalShuffle) {
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return LegalShuffle;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
+ LegalShuffle, ZeroIdx);
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
+ // If we are inserting a bitcast value into an undef, with the same
+ // number of elements, just use the bitcast input of the extract.
+ // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+ if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+ N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(0).getOperand(1) == N2 &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
+ VT.getSizeInBits()) {
+ return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+ }
+
+ // If both N1 and N2 are bitcast values on which insert_subvector
+ // would makes sense, pull the bitcast through.
+ // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+ SDValue CN0 = N0.getOperand(0);
+ SDValue CN1 = N1.getOperand(0);
+ EVT CN0VT = CN0.getValueType();
+ EVT CN1VT = CN1.getValueType();
+ if (CN0VT.isVector() && CN1VT.isVector() &&
+ CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
+ CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
+ SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ CN0.getValueType(), CN0, CN1, N2);
+ return DAG.getBitcast(VT, NewINSERT);
+ }
+ }
+
+ // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+ // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+ // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N0.getOperand(1).getValueType() == N1.getValueType() &&
+ N0.getOperand(2) == N2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+ N1, N2);
+
+ // Eliminate an intermediate insert into an undef vector:
+ // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
+ // insert_subvector undef, X, N2
+ if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
+ N1.getOperand(1), N2);
+
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Push subvector bitcasts to the output, adjusting the index as we go.
+ // insert_subvector(bitcast(v), bitcast(s), c1)
+ // -> bitcast(insert_subvector(v, s, c2))
+ if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+ N1.getOpcode() == ISD::BITCAST) {
+ SDValue N0Src = peekThroughBitcasts(N0);
+ SDValue N1Src = peekThroughBitcasts(N1);
+ EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+ EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+ if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+ N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+ EVT NewVT;
+ SDLoc DL(N);
+ SDValue NewIdx;
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+ NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+ unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+ if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ }
+ }
+ if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+ SDValue Res = DAG.getBitcast(NewVT, N0Src);
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = N0.getConstantOperandVal(2);
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
+ // If the input vector is a concatenation, and the insert replaces
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType()) {
+ unsigned Factor = N1.getValueType().getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
+ // Simplify source operands based on insertion.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::FP16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // VECREDUCE over 1-element vector is just an extract.
+ if (VT.getVectorNumElements() == 1) {
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+ return Res;
+ }
+
+ // On an boolean vector an and/or reduction is the same as a umin/umax
+ // reduction. Convert them if the latter is legal while the former isn't.
+ if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+ unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+ ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+ TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+ DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+ }
+
+ return SDValue();
+}
+
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = peekThroughBitcasts(N->getOperand(1));
+ SDLoc DL(N);
+
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
+
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
+ SmallVector<int, 8> Indices;
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ if (Elt.isUndef()) {
+ Indices.push_back(-1);
+ continue;
+ }
+
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian()) {
+ Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
+ } else {
+ Bits.lshrInPlace(SubIdx * NumSubBits);
+ }
+
+ if (Split > 1)
+ Bits = Bits.trunc(NumSubBits);
+
+ if (Bits.isAllOnesValue())
+ Indices.push_back(i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
+ return SDValue();
+
+ SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, Indices));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
+
+ return SDValue();
+}
+
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ int Index0, Index1;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ if (!Src0 || !Src1 || Index0 != Index1 ||
+ Src0.getValueType().getVectorElementType() != EltVT ||
+ Src1.getValueType().getVectorElementType() != EltVT ||
+ !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue IndexC =
+ DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+ // If all lanes but 1 are undefined, no need to splat the scalar result.
+ // TODO: Keep track of undefs and use that info in the general case.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+ count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+ count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+ // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+ // build_vec ..undef, (bo X, Y), undef...
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+ Ops[Index0] = ScalarBO;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
+/// Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
+ EVT VT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
+
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
+
+ // Move unary shuffles with identical masks after a vector binop:
+ // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+ // --> shuffle (VBinOp A, B), Undef, Mask
+ // This does not require type legality checks because we are creating the
+ // same types of operations that are in the original sequence. We do have to
+ // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+ // though. This code is adapted from the identical transform in instcombine.
+ if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
+ Opcode != ISD::UREM && Opcode != ISD::SREM &&
+ Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDLoc DL(N);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+ RHS.getOperand(0), N->getFlags());
+ SDValue UndefV = LHS.getOperand(1);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+ }
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDLoc DL(N);
+ SDValue VecC =
+ DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
+ }
+ }
+
+ // Make sure all but the first op are undef or constant.
+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {
+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+ std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+ [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
+ };
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of the concat may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
+ // concat (VBinOp X, Y), VecC
+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ EVT NarrowVT = LHS.getOperand(0).getValueType();
+ if (NarrowVT == RHS.getOperand(0).getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ SDLoc DL(N);
+ unsigned NumOperands = LHS.getNumOperands();
+ SmallVector<SDValue, 4> ConcatOps;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ // This constant fold for operands 1 and up.
+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
+ RHS.getOperand(i)));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ }
+ }
+
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2) {
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ const SDNodeFlags Flags = N0.getNode()->getFlags();
+ SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4), Flags);
+ AddToWorklist(SETCC.getNode());
+ SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select. Callers of this
+/// should assume that TheSelect is deleted if this returns true. As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+ // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
+ if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+ if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+ // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+ SDValue Sqrt = RHS;
+ ISD::CondCode CC;
+ SDValue CmpLHS;
+ const ConstantFPSDNode *Zero = nullptr;
+
+ if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+ CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CmpLHS = TheSelect->getOperand(0);
+ Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ } else {
+ // SELECT or VSELECT
+ SDValue Cmp = TheSelect->getOperand(0);
+ if (Cmp.getOpcode() == ISD::SETCC) {
+ CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CmpLHS = Cmp.getOperand(0);
+ Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ }
+ }
+ if (Zero && Zero->isZero() &&
+ Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+ CC == ISD::SETULT || CC == ISD::SETLT)) {
+ // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ CombineTo(TheSelect, Sqrt);
+ return true;
+ }
+ }
+ }
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ // Be conservative for atomics for the moment
+ // TODO: This does appear to be legal for unordered atomics (see D66309)
+ !LLD->isSimple() || !RLD->isSimple() ||
+ // FIXME: If either is a pre/post inc/dec load,
+ // we'd need to split out the address adjustment.
+ LLD->isIndexed() || RLD->isIndexed() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // We can't produce a CMOV of a TargetFrameIndex since we won't
+ // generate the address generation required.
+ LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
+ return false;
+
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
+ // Always fail if LLD and RLD are not independent. TheSelect is a
+ // predecessor to all Nodes in question so we need not search past it.
+
+ Visited.insert(TheSelect);
+ Worklist.push_back(LLD);
+ Worklist.push_back(RLD);
+
+ if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
+ return false;
+
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondNode}. As we've already compared the
+ // Loads, we only need to check if CondNode is a successor to one of the
+ // loads. We can further avoid this if there's no use of their chain
+ // value.
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ Worklist.push_back(CondNode);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
+ return false;
+
+ Addr = DAG.getSelect(SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ // We cannot do this optimization if any pair of {RLD, LLD} is a
+ // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
+ // the Loads, we only need to check if CondLHS/CondRHS is a successor to
+ // one of the loads. We can further avoid this if there's no use of their
+ // chain value.
+
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+ Worklist.push_back(CondLHS);
+ Worklist.push_back(CondRHS);
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ // It is safe to replace the two loads if they have different alignments,
+ // but the new load must be the minimum (most restrictive) alignment of the
+ // inputs.
+ unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
+ if (!RLD->isInvariant())
+ MMOFlags &= ~MachineMemOperand::MOInvariant;
+ if (!RLD->isDereferenceable())
+ MMOFlags &= ~MachineMemOperand::MODereferenceable;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
+ LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
+ MMOFlags);
+ } else {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getExtLoad(
+ LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
+ : LLD->getExtensionType(),
+ SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
+ MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+ SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // If this is a select where the false operand is zero and the compare is a
+ // check of the sign bit, see if we can perform the "gzip trick":
+ // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+ // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (!isNullConstant(N3) || !XType.bitsGE(AType))
+ return SDValue();
+
+ // If the comparison is testing for a positive value, we have to invert
+ // the sign bit mask, so only do that transform if the target has a bitwise
+ // 'and not' instruction (the invert is free).
+ if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+ // (X > -1) ? A : 0
+ // (X > 0) ? X : 0 <-- This is canonical signed max.
+ if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else if (CC == ISD::SETLT) {
+ // (X < 0) ? A : 0
+ // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
+ if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+ // constant.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+ unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
+/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+/// in it. This may be a win when the constant is not otherwise available
+/// because it replaces two constant pool loads with one.
+SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
+ const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
+ return SDValue();
+
+ // If we are before legalize types, we want the other legalization to happen
+ // first (for example, to avoid messing with soft float).
+ auto *TV = dyn_cast<ConstantFPSDNode>(N2);
+ auto *FV = dyn_cast<ConstantFPSDNode>(N3);
+ EVT VT = N2.getValueType();
+ if (!TV || !FV || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // If a constant can be materialized without loads, this does not make sense.
+ if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
+ return SDValue();
+
+ // If both constants have multiple uses, then we won't need to do an extra
+ // load. The values are likely around in registers for other users.
+ if (!TV->hasOneUse() && !FV->hasOneUse())
+ return SDValue();
+
+ Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue()) };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = DAG.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get offsets to the 0 and 1 elements of the array, so we can select between
+ // them.
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+ SDValue Cond =
+ DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+ AddToWorklist(Cond.getNode());
+ SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
+ AddToWorklist(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
+ AddToWorklist(CPIdx.getNode());
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(
+ DAG.getMachineFunction()), Alignment);
+}
+
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT CmpOpVT = N0.getValueType();
+ EVT CmpResVT = getSetCCResultType(CmpOpVT);
+ EVT VT = N2.getValueType();
+ auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant.
+ if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+ AddToWorklist(SCC.getNode());
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !(SCCC->isNullValue()) ? N2 : N3;
+ }
+ }
+
+ if (SDValue V =
+ convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
+ return V;
+
+ if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+ return V;
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ // TODO: The operation legality checks could be loosened to include "custom",
+ // but that may cause regressions for targets that do not have shift
+ // instructions.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
+ TLI.isOperationLegal(ISD::SHL, VT) &&
+ TLI.isOperationLegal(ISD::SRA, VT)) {
+ SDValue AndLHS = N0->getOperand(0);
+ auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ const APInt &AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+ bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+ if ((Fold || Swap) &&
+ TLI.getBooleanContents(CmpOpVT) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+
+ if (Swap) {
+ CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ std::swap(N2C, N3C);
+ }
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->isOne())
+ return SDValue();
+
+ SDValue Temp, SCC;
+ // zext (setcc n0, n1)
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
+ if (VT.bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ } else {
+ SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ }
+
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
+
+ if (N2C->isOne())
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ SDLoc(Temp),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+
+ // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
+ // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
+ // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue ValueOnZero = N2;
+ SDValue Count = N3;
+ // If the condition is NE instead of E, swap the operands.
+ if (CC == ISD::SETNE)
+ std::swap(ValueOnZero, Count);
+ // Check if the value on zero is a constant equal to the bits in the type.
+ if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
+ if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
+ // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
+ // legal, combine to just cttz.
+ if ((Count.getOpcode() == ISD::CTTZ ||
+ Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
+ return DAG.getNode(ISD::CTTZ, DL, VT, N0);
+ // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
+ // legal, combine to just ctlz.
+ if ((Count.getOpcode() == ISD::CTLZ ||
+ Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
+ return DAG.getNode(ISD::CTLZ, DL, VT, N0);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isNullValue())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+ EVT VT = V.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+ SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+ return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// =>
+/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+/// does not require additional intermediate precision]
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
+ SDNodeFlags Flags) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+ AddToWorklist(Est.getNode());
+
+ SDLoc DL(Op);
+ if (Iterations) {
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last iteration, also multiply by the numerator.
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,
+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
+ AddToWorklist(Est.getNode());
+ }
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
+
+ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ }
+
+ // If non-reciprocal square root is requested, multiply the result by Arg.
+ if (!Reciprocal)
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+
+ return Est;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
+
+ // This routine must enter the loop below to work correctly
+ // when (Reciprocal == false).
+ assert(Iterations > 0);
+
+ // Newton iterations for reciprocal square root:
+ // E = (E * -0.5) * ((A * E) * E + -3.0)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+
+ // When calculating a square root at the last iteration build:
+ // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
+ // (notice a common subexpression)
+ SDValue LHS;
+ if (Reciprocal || (i + 1) < Iterations) {
+ // RSQRT: LHS = (E * -0.5)
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ } else {
+ // SQRT: LHS = (A * E) * -0.5
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ }
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ }
+
+ return Est;
+}
+
+/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
+/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
+/// Op can be zero.
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
+ bool Reciprocal) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
+ bool UseOneConstNR = false;
+ if (SDValue Est =
+ TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+ Reciprocal)) {
+ AddToWorklist(Est.getNode());
+
+ if (Iterations) {
+ Est = UseOneConstNR
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+ if (!Reciprocal) {
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 for those cases.
+ SDLoc DL(Op);
+ EVT CCVT = getSetCCResultType(VT);
+ ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute Denorms = F.getFnAttribute("denormal-fp-math");
+ if (Denorms.getValueAsString().equals("ieee")) {
+ // fabs(X) < SmallestNormal ? 0.0 : Est
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
+ } else {
+ // X == 0.0 ? 0.0 : Est
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
+ }
+ }
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, true);
+}
+
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, false);
+}
+
+/// Return true if there is any possibility that the two addresses overlap.
+bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ SDValue BasePtr;
+ int64_t Offset;
+ Optional<int64_t> NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+ if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ int64_t Offset = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+ ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ Offset /*base offset*/,
+ Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ LSN->getMemOperand()};
+ }
+ if (const auto *LN = cast<LifetimeSDNode>(N))
+ return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
+ (LN->hasOffset()) ? LN->getOffset() : 0,
+ (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
+ : Optional<int64_t>(),
+ (MachineMemOperand *)nullptr};
+ // Default.
+ return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/,
+ Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ };
+
+ MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+ MUC1 = getCharacteristics(Op1);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+ DAG, IsAlias))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+ // either are not known.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large
+ // alignment compared to the size and offset of the access, we may be able
+ // to prove they do not alias. This check is conservative for now to catch
+ // cases created by splitting vector types.
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
+ unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
+ if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
+ MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
+ *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
+
+ // There is no overlap between these relatively aligned accesses of
+ // similar size. Return no alias.
+ if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
+ (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
+ return false;
+ }
+
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
+ ? CombinerGlobalAA
+ : DAG.getSubtarget().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
+ if (AAResult == NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ // TODO: relax aliasing for unordered atomics (see D66309)
+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Attempt to improve chain by a single step
+ std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ switch (C.getOpcode()) {
+ case ISD::EntryToken:
+ // No need to mark EntryToken.
+ C = SDValue();
+ return true;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for C.
+ // TODO: Relax aliasing for unordered atomics (see D66309)
+ bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+ cast<LSBaseSDNode>(C.getNode())->isSimple();
+ if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ // Alias, so stop here.
+ return false;
+ }
+
+ case ISD::CopyFromReg:
+ // Always forward past past CopyFromReg.
+ C = C.getOperand(0);
+ return true;
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the memory access.
+ if (!isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ };
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.pop_back_val();
+
+ // Don't bother if we've seen Chain before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we reach the depth limit.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ return;
+ }
+
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ continue;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ continue;
+ }
+ // Everything else
+ if (ImproveChain(Chain)) {
+ // Updated Chain Found, Consider new chain if one exists.
+ if (Chain.getNode())
+ Chains.push_back(Chain);
+ ++Depth;
+ continue;
+ }
+ // No Improved Chain Possible, treat as Alias.
+ Aliases.push_back(Chain);
+ }
+}
+
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ if (OptLevel == CodeGenOpt::None)
+ return OldChain;
+
+ // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Aliases;
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getTokenFactor(SDLoc(N), Aliases);
+}
+
+namespace {
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+} // namespace
+
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because MergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running MergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap =
+ llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
+
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
+
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (!SDValue(Chain, 0)->hasOneUse())
+ break;
+ // TODO: Relax for unordered atomics (see D66309)
+ if (!Chain->isSimple() || Chain->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
+ // Check that the base pointer is the same as the original one.
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+ break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
+ break;
+ Intervals.insert(Offset, Offset + Length, Unit);
+
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
+ }
+
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
+
+ TFOps.push_back(NewST);
+
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
+ CombineTo(St, TF);
+
+ // Add TF and its operands to the worklist.
+ AddToWorklist(TF.getNode());
+ for (const SDValue &Op : TF->ops())
+ AddToWorklist(Op.getNode());
+ AddToWorklist(STChain);
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
+
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
+ CodeGenOpt::Level OptLevel) {
+ /// This is the main entry point to this class.
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000000000..6d7260d7aee5c
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,2474 @@
+//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "isel"
+
+// FIXME: Remove this after the feature has proven reliable.
+static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
+ cl::init(true), cl::Hidden,
+ cl::desc("Sink local values in FastISel"));
+
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
+/// Set the current block to which generated machine instructions will be
+/// appended.
+void FastISel::startNewBlock() {
+ assert(LocalValueMap.empty() &&
+ "local values should be cleared after finishing a BB");
+
+ // Instructions are appended to FuncInfo.MBB. If the basic block already
+ // contains labels or copies, use the last instruction as the last local
+ // value.
+ EmitStartPt = nullptr;
+ if (!FuncInfo.MBB->empty())
+ EmitStartPt = &FuncInfo.MBB->back();
+ LastLocalValue = EmitStartPt;
+}
+
+/// Flush the local CSE map and sink anything we can.
+void FastISel::finishBasicBlock() { flushLocalValueMap(); }
+
+bool FastISel::lowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!fastLowerArguments())
+ return false;
+
+ // Enter arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end();
+ I != E; ++I) {
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[&*I] = VI->second;
+ }
+ return true;
+}
+
+/// Return the defined register if this instruction defines exactly one
+/// virtual register and uses no other virtual registers. Otherwise return 0.
+static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
+ unsigned RegDef = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ if (RegDef)
+ return 0;
+ RegDef = MO.getReg();
+ } else if (Register::isVirtualRegister(MO.getReg())) {
+ // This is another use of a vreg. Don't try to sink it.
+ return 0;
+ }
+ }
+ return RegDef;
+}
+
+void FastISel::flushLocalValueMap() {
+ // Try to sink local values down to their first use so that we can give them a
+ // better debug location. This has the side effect of shrinking local value
+ // live ranges, which helps out fast regalloc.
+ if (SinkLocalValues && LastLocalValue != EmitStartPt) {
+ // Sink local value materialization instructions between EmitStartPt and
+ // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
+ // avoid inserting into the range that we're iterating over.
+ MachineBasicBlock::reverse_iterator RE =
+ EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
+ : FuncInfo.MBB->rend();
+ MachineBasicBlock::reverse_iterator RI(LastLocalValue);
+
+ InstOrderMap OrderMap;
+ for (; RI != RE;) {
+ MachineInstr &LocalMI = *RI;
+ ++RI;
+ bool Store = true;
+ if (!LocalMI.isSafeToMove(nullptr, Store))
+ continue;
+ unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ if (DefReg == 0)
+ continue;
+
+ sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ }
+ }
+
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+ SavedInsertPt = FuncInfo.InsertPt;
+ LastFlushPoint = FuncInfo.InsertPt;
+}
+
+static bool isRegUsedByPhiNodes(unsigned DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
+/// Build a map of instruction orders. Return the first terminator and its
+/// order. Consider EH_LABEL instructions to be terminators as well, since local
+/// values for phis after invokes must be materialized before the call.
+void FastISel::InstOrderMap::initialize(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
+ unsigned Order = 0;
+ for (MachineInstr &I : *MBB) {
+ if (!FirstTerminator &&
+ (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) {
+ FirstTerminator = &I;
+ FirstTerminatorOrder = Order;
+ }
+ Orders[&I] = Order++;
+
+ // We don't need to order instructions past the last flush point.
+ if (I.getIterator() == LastFlushPoint)
+ break;
+ }
+}
+
+void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
+ unsigned DefReg,
+ InstOrderMap &OrderMap) {
+ // If this register is used by a register fixup, MRI will not contain all
+ // the uses until after register fixups, so don't attempt to sink or DCE
+ // this instruction. Register fixups typically come from no-op cast
+ // instructions, which replace the cast instruction vreg with the local
+ // value vreg.
+ if (FuncInfo.RegsWithFixups.count(DefReg))
+ return;
+
+ // We can DCE this instruction if there are no uses and it wasn't a
+ // materialized for a successor PHI node.
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization "
+ << LocalMI);
+ OrderMap.Orders.erase(&LocalMI);
+ LocalMI.eraseFromParent();
+ return;
+ }
+
+ // Number the instructions if we haven't yet so we can efficiently find the
+ // earliest use.
+ if (OrderMap.Orders.empty())
+ OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
+
+ // Find the first user in the BB.
+ MachineInstr *FirstUser = nullptr;
+ unsigned FirstOrder = std::numeric_limits<unsigned>::max();
+ for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
+ auto I = OrderMap.Orders.find(&UseInst);
+ assert(I != OrderMap.Orders.end() &&
+ "local value used by instruction outside local region");
+ unsigned UseOrder = I->second;
+ if (UseOrder < FirstOrder) {
+ FirstOrder = UseOrder;
+ FirstUser = &UseInst;
+ }
+ }
+
+ // The insertion point will be the first terminator or the first user,
+ // whichever came first. If there was no terminator, this must be a
+ // fallthrough block and the insertion point is the end of the block.
+ MachineBasicBlock::instr_iterator SinkPos;
+ if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
+ FirstOrder = OrderMap.FirstTerminatorOrder;
+ SinkPos = OrderMap.FirstTerminator->getIterator();
+ } else if (FirstUser) {
+ SinkPos = FirstUser->getIterator();
+ } else {
+ assert(UsedByPHI && "must be users if not used by a phi");
+ SinkPos = FuncInfo.MBB->instr_end();
+ }
+
+ // Collect all DBG_VALUEs before the new insertion position so that we can
+ // sink them.
+ SmallVector<MachineInstr *, 1> DbgValues;
+ for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
+ if (!DbgVal.isDebugValue())
+ continue;
+ unsigned UseOrder = OrderMap.Orders[&DbgVal];
+ if (UseOrder < FirstOrder)
+ DbgValues.push_back(&DbgVal);
+ }
+
+ // Sink LocalMI before SinkPos and assign it the same DebugLoc.
+ LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
+ FuncInfo.MBB->remove(&LocalMI);
+ FuncInfo.MBB->insert(SinkPos, &LocalMI);
+ if (SinkPos != FuncInfo.MBB->end())
+ LocalMI.setDebugLoc(SinkPos->getDebugLoc());
+
+ // Sink any debug values that we've collected.
+ for (MachineInstr *DI : DbgValues) {
+ FuncInfo.MBB->remove(DI);
+ FuncInfo.MBB->insert(SinkPos, DI);
+ }
+}
+
+bool FastISel::hasTrivialKill(const Value *V) {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const auto *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Even the value might have only one use in the LLVM IR, it is possible that
+ // FastISel might fold the use into another instruction and now there is more
+ // than one use at the Machine Instruction level.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg && !MRI.use_empty(Reg))
+ return false;
+
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V))
+ Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
+ else if (isa<ConstantPointerNull>(V))
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg = getRegForValue(
+ Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue())
+ Reg = fastMaterializeFloatZero(CF);
+ else
+ // Try to emit the constant directly.
+ Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy(DL);
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
+ bool isExact;
+ (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
+ if (IntegerReg != 0)
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
+ /*Kill=*/false);
+ }
+ }
+ } else if (const auto *Op = dyn_cast<Operator>(V)) {
+ if (!selectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !fastSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+ return Reg;
+}
+
+/// Helper for getRegForValue. This function is called when the value isn't
+/// already available in a register and must be materialized with new
+/// instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ // Give the target-specific code a try first.
+ if (isa<Constant>(V))
+ Reg = fastMaterializeConstant(cast<Constant>(V));
+
+ // If target-specific code couldn't or didn't want to handle the value, then
+ // give target-independent code a try.
+ if (!Reg)
+ Reg = materializeConstant(V, VT);
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++) {
+ FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+ FuncInfo.RegsWithFixups.insert(Reg + i);
+ }
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy(DL);
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
+ IdxNIsKill);
+ IdxNIsKill = true;
+ } else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN =
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
+ "Invalid iterator!");
+ while (I != E) {
+ if (LastFlushPoint == I)
+ LastFlushPoint = E;
+ if (SavedInsertPt == I)
+ SavedInsertPt = E;
+ if (EmitStartPt == I)
+ EmitStartPt = E.isValid() ? &*E : nullptr;
+ if (LastLocalValue == I)
+ LastLocalValue = E.isValid() ? &*E : nullptr;
+
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DbgLoc;
+ recomputeInsertPt();
+ DbgLoc = DebugLoc();
+ SavePoint SP = {OldInsertPt, OldDL};
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = &*std::prev(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DbgLoc = OldInsertPt.DL;
+}
+
+bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg =
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
+ CI->getZExtValue(), VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getSExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
+ unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
+ if (!ResultReg)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
+ MVT VT = TLI.getPointerTy(DL);
+ for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ }
+ } else {
+ Type *Ty = GTI.getIndexedType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+ const CallInst *CI, unsigned StartIdx) {
+ for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ Value *Val = CI->getArgOperand(i);
+ // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+ if (const auto *C = dyn_cast<ConstantInt>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (isa<ConstantPointerNull>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(0));
+ } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+ // Values coming from a stack location also require a special encoding,
+ // but that is added later on by the target specific frame index
+ // elimination implementation.
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ Ops.push_back(MachineOperand::CreateFI(SI->second));
+ else
+ return false;
+ } else {
+ unsigned Reg = getRegForValue(Val);
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ }
+ }
+ return true;
+}
+
+bool FastISel::selectStackmap(const CallInst *I) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+ assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+ "Stackmap cannot return a value.");
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target-specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // CALLSEQ_START(0, 0...)
+ // STACKMAP(id, nbytes, ...)
+ // CALLSEQ_END(0, 0)
+ //
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Push live variables for the stack map (skipping the first two arguments
+ // <id> and <numBytes>).
+ if (!addStackMapLiveVars(Ops, I, 2))
+ return false;
+
+ // We are not adding any register mask info here, because the stackmap doesn't
+ // clobber anything.
+
+ // Add scratch registers as implicit def and early clobber.
+ CallingConv::ID CC = I->getCallingConv();
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
+
+ // Issue STACKMAP.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::STACKMAP));
+ for (auto const &MO : Ops)
+ MIB.add(MO);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ .addImm(0)
+ .addImm(0);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
+
+ return true;
+}
+
+/// Lower an argument list according to the target calling convention.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
+ unsigned NumArgs, const Value *Callee,
+ bool ForceRetVoidTy, CallLoweringInfo &CLI) {
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ ImmutableCallSite CS(CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, ArgI);
+ Args.push_back(Entry);
+ }
+
+ Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
+ : CI->getType();
+ CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
+ const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
+ StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, Target, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs);
+}
+
+bool FastISel::selectPatchpoint(const CallInst *I) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+ CallingConv::ID CC = I->getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !I->getType()->isVoidTy();
+ Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
+
+ // Get the real number of arguments participating in the call <numArgs>
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
+ "Expected a constant integer.");
+ const auto *NumArgsVal =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = NumArgsVal->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // This includes all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ CallLoweringInfo CLI;
+ CLI.setIsPatchPoint();
+ if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
+ return false;
+
+ assert(CLI.Call && "No call instruction specified.");
+
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add an explicit result reg if we use the anyreg calling convention.
+ if (IsAnyRegCC && HasDef) {
+ assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ CLI.NumResultRegs = 1;
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
+ }
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Add the call target.
+ if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+ if (C->getOpcode() == Instruction::IntToPtr) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else
+ llvm_unreachable("Unsupported ConstantExpr.");
+ } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
+ Ops.push_back(MachineOperand::CreateGA(GV, 0));
+ } else if (isa<ConstantPointerNull>(Callee))
+ Ops.push_back(MachineOperand::CreateImm(0));
+ else
+ llvm_unreachable("Unsupported callee address.");
+
+ // Adjust <numArgs> to account for any arguments that have been passed on
+ // the stack instead.
+ unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
+ Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
+
+ // Add the calling convention
+ Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC) {
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
+ unsigned Reg = getRegForValue(I->getArgOperand(i));
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ }
+ }
+
+ // Push the arguments from the call instruction.
+ for (auto Reg : CLI.OutRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+
+ // Push live variables for the stack map.
+ if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
+ return false;
+
+ // Push the register mask info.
+ Ops.push_back(MachineOperand::CreateRegMask(
+ TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
+
+ // Add scratch registers as implicit def and early clobber.
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
+
+ // Add implicit defs (return values).
+ for (auto Reg : CLI.InRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+ /*isImp=*/true));
+
+ // Insert the patchpoint instruction before the call generated by the target.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ TII.get(TargetOpcode::PATCHPOINT));
+
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ // Delete the original call instruction.
+ CLI.Call->eraseFromParent();
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
+
+ if (CLI.NumResultRegs)
+ updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
+ return true;
+}
+
+bool FastISel::selectXRayCustomEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*isDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Event Call instruction, that gets lowered properly.
+ return true;
+}
+
+bool FastISel::selectXRayTypedEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*isDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
+ /*isDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
+ return true;
+}
+
+/// Returns an AttributeList representing the attributes applied to the return
+/// value of the given call.
+static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
+ unsigned NumArgs) {
+ MCContext &Ctx = MF->getContext();
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, SymName, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return lowerCallTo(CI, Sym, NumArgs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
+ unsigned NumArgs) {
+ ImmutableCallSite CS(CI);
+
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, ArgI);
+ Args.push_back(Entry);
+ }
+ TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
+ // Handle the incoming return values from the call.
+ CLI.clearIns();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(
+ CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ // FIXME: sret demotion isn't supported yet - bail out.
+ if (!CanLowerReturn)
+ return false;
+
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.clearOuts();
+ for (auto &Arg : CLI.getArgs()) {
+ Type *FinalType = Arg.Ty;
+ if (Arg.IsByVal)
+ FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+
+ ISD::ArgFlagsTy Flags;
+ if (Arg.IsZExt)
+ Flags.setZExt();
+ if (Arg.IsSExt)
+ Flags.setSExt();
+ if (Arg.IsInReg)
+ Flags.setInReg();
+ if (Arg.IsSRet)
+ Flags.setSRet();
+ if (Arg.IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Arg.IsSwiftError)
+ Flags.setSwiftError();
+ if (Arg.IsByVal)
+ Flags.setByVal();
+ if (Arg.IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling in
+ // the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca) {
+ PointerType *Ty = cast<PointerType>(Arg.Ty);
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize =
+ DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+ // For ByVal, alignment should come from FE. BE will guess if this info
+ // is not there, but there are cases it cannot get right.
+ unsigned FrameAlign = Arg.Alignment;
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(Align(FrameAlign));
+ }
+ if (Arg.IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
+
+ CLI.OutVals.push_back(Arg.Val);
+ CLI.OutFlags.push_back(Flags);
+ }
+
+ if (!fastLowerCall(CLI))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ assert(CLI.Call && "No call instruction specified.");
+ CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ if (CLI.NumResultRegs && CLI.CS)
+ updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+
+ // Set labels for heapallocsite call.
+ if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
+ const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ MF->addCodeViewHeapAllocSite(CLI.Call, MD);
+ }
+
+ return true;
+}
+
+bool FastISel::lowerCall(const CallInst *CI) {
+ ImmutableCallSite CS(CI);
+
+ FunctionType *FuncTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin());
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within fastLowerCall.
+ bool IsTailCall = CI->isTailCall();
+ if (IsTailCall && !isInTailCallPosition(CS, TM))
+ IsTailCall = false;
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ .setTailCall(IsTailCall);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // If the inline asm has side effects, then make sure that no local value
+ // lives across by flushing the local value map.
+ if (IA->hasSideEffects())
+ flushLocalValueMap();
+
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ // Handle intrinsic function calls.
+ if (const auto *II = dyn_cast<IntrinsicInst>(Call))
+ return selectIntrinsicCall(II);
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ flushLocalValueMap();
+
+ return lowerCall(Call);
+}
+
+bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
+ // Neither does the sideeffect intrinsic.
+ case Intrinsic::sideeffect:
+ // Neither does the assume intrinsic; it's also OK not to codegen its operand.
+ case Intrinsic::assume:
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
+ assert(DI->getVariable() && "Missing variable");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ // Byval arguments with frame indices were already handled after argument
+ // lowering and before isel.
+ const auto *Arg =
+ dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
+ if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ return true;
+
+ Optional<MachineOperand> Op;
+ if (unsigned Reg = lookUpRegForValue(Address))
+ Op = MachineOperand::CreateReg(Reg, false);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Op && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
+ false);
+
+ if (Op) {
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ auto *Expr = DI->getExpression();
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
+ *Op, DI->getVariable(), Expr);
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(II);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+ DI->getVariable(), DI->getExpression());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addCImm(CI)
+ .addReg(0U)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addImm(CI->getZExtValue())
+ .addReg(0U)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addFPImm(CF)
+ .addReg(0U)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ // FIXME: This does not handle register-indirect values at offset 0.
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
+ DI->getVariable(), DI->getExpression());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
+ return true;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst *DI = cast<DbgLabelInst>(II);
+ assert(DI->getLabel() && "Missing label");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
+ return true;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::experimental_stackmap:
+ return selectStackmap(II);
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ return selectPatchpoint(II);
+
+ case Intrinsic::xray_customevent:
+ return selectXRayCustomEvent(II);
+ case Intrinsic::xray_typedevent:
+ return selectXRayTypedEvent(II);
+ }
+
+ return fastLowerIntrinsicCall(II);
+}
+
+bool FastISel::selectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
+ !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Opcode, InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(DL, I->getType());
+ if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+ !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DstVT = DstEVT.getSimpleVT();
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT == DstVT) {
+ const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
+bool FastISel::selectInstruction(const Instruction *I) {
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (I->isTerminator()) {
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ return false;
+ }
+ }
+
+ // FastISel does not handle any operand bundles except OB_funclet.
+ if (ImmutableCallSite CS = ImmutableCallSite(I))
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+ if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ return false;
+
+ DbgLoc = I->getDebugLoc();
+
+ SavedInsertPt = FuncInfo.InsertPt;
+
+ if (const auto *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc Func;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+
+ // Don't handle Intrinsic::trap if a trap function is specified.
+ if (F && F->getIntrinsicID() == Intrinsic::trap &&
+ Call->hasFnAttr("trap-func-name"))
+ return false;
+ }
+
+ // First, try doing target-independent selection.
+ if (!SkipTargetIndependentISel) {
+ if (selectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ SavedInsertPt = FuncInfo.InsertPt;
+ }
+ // Next, try calling the target to attempt to handle the instruction.
+ if (fastSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+
+ DbgLoc = DebugLoc();
+ // Undo phi node updates, because they will be added again by SelectionDAG.
+ if (I->isTerminator()) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
+ return false;
+}
+
+/// Emit an unconditional branch to the given block, unless it is the immediate
+/// (fall-through) successor, and update the CFG.
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
+ const DebugLoc &DbgLoc) {
+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only non-debug
+ // instruction in the block then emit it, otherwise we have the
+ // unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
+ SmallVector<MachineOperand, 0>(), DbgLoc);
+ }
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, DbgLoc);
+}
+
+/// Emit an FNeg operation.
+bool FastISel::selectFNeg(const User *I, const Value *In) {
+ unsigned OpReg = getRegForValue(In);
+ if (!OpReg)
+ return false;
+ bool OpRegIsKill = hasTrivialKill(In);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(DL, I->getType());
+ unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ OpReg, OpRegIsKill);
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64)
+ return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (!IntReg)
+ return false;
+
+ unsigned IntResultReg = fastEmit_ri_(
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
+ if (!IntResultReg)
+ return false;
+
+ ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
+ IntResultReg, /*IsKill=*/true);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ updateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool FastISel::selectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return selectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return selectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return selectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub: {
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ Value *X;
+ if (match(I, m_FNeg(m_Value(X))))
+ return selectFNeg(I, X);
+ return selectBinaryOp(I, ISD::FSUB);
+ }
+ case Instruction::Mul:
+ return selectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return selectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return selectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return selectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return selectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return selectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return selectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return selectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return selectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return selectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return selectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return selectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return selectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return selectBinaryOp(I, ISD::XOR);
+
+ case Instruction::FNeg:
+ return selectFNeg(I, I->getOperand(0));
+
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ fastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ if (TM.Options.TrapUnreachable)
+ return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ else
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ if (TM.getTargetTriple().isOSAIX())
+ return false;
+ return selectCall(I);
+
+ case Instruction::BitCast:
+ return selectBitCast(I);
+
+ case Instruction::FPToSI:
+ return selectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return selectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return selectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return selectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return selectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return selectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return selectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return selectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo,
+ bool SkipTargetIndependentISel)
+ : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
+ MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
+ TII(*MF->getSubtarget().getInstrInfo()),
+ TLI(*MF->getSubtarget().getTargetLowering()),
+ TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+
+FastISel::~FastISel() = default;
+
+bool FastISel::fastLowerArguments() { return false; }
+
+bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
+
+bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
+ return false;
+}
+
+unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
+
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// This method is a wrapper of fastEmit_ri. It first tries to emit an
+/// instruction with an immediate operand using fastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// fastEmit_rr instead.
+unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg)
+ return ResultReg;
+ unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ bool IsImmKill = true;
+ if (!MaterialReg) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy =
+ IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ if (!MaterialReg)
+ return 0;
+ // FIXME: If the materialized register here has no uses yet then this
+ // will be the first use and we should be able to mark it as killed.
+ // However, the local value area for materialising constant expressions
+ // grows down, not up, which means that any constant expressions we generate
+ // later which also use 'Imm' could be after this instruction and therefore
+ // after this kill.
+ IsImmKill = false;
+ }
+ return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+ unsigned OpNum) {
+ if (Register::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
+unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, unsigned Op2,
+ bool Op2IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+ bool Op0IsKill, uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(Register::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// Emit MachineInstrs to compute the value of Op with all but the least
+/// significant bit set to zero.
+unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const Instruction *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin()))
+ continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (const PHINode &PN : SuccBB->phis()) {
+ // Ignore dead phi's.
+ if (PN.use_empty())
+ continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DbgLoc = PN.getDebugLoc();
+ if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
+ DbgLoc = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (!Reg) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
+ DbgLoc = DebugLoc();
+ }
+ }
+
+ return true;
+}
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+ assert(LI->hasOneUse() &&
+ "tryToFoldLoad expected a LoadInst with a single use");
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->user_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->user_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile())
+ return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = getRegForValue(LI);
+ if (!LoadReg)
+ return false;
+
+ // We can't fold if this vreg has no uses or more than one use. Multiple uses
+ // may mean that the instruction got lowered to multiple MIs, or the use of
+ // the loaded value ended up being multiple operands of the result.
+ if (!MRI.hasOneUse(LoadReg))
+ return false;
+
+ MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+ MachineInstr *User = RI->getParent();
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes; make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo.InsertPt = User;
+ FuncInfo.MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
+ // Must be an add.
+ if (!isa<AddOperator>(Add))
+ return false;
+ // Type size needs to match.
+ if (DL.getTypeSizeInBits(GEP->getType()) !=
+ DL.getTypeSizeInBits(Add->getType()))
+ return false;
+ // Must be in the same basic block.
+ if (isa<Instruction>(Add) &&
+ FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
+ return false;
+ // Must have a constant operand.
+ return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
+}
+
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+ const Value *Ptr;
+ Type *ValTy;
+ unsigned Alignment;
+ MachineMemOperand::Flags Flags;
+ bool IsVolatile;
+
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ Alignment = LI->getAlignment();
+ IsVolatile = LI->isVolatile();
+ Flags = MachineMemOperand::MOLoad;
+ Ptr = LI->getPointerOperand();
+ ValTy = LI->getType();
+ } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ Alignment = SI->getAlignment();
+ IsVolatile = SI->isVolatile();
+ Flags = MachineMemOperand::MOStore;
+ Ptr = SI->getPointerOperand();
+ ValTy = SI->getValueOperand()->getType();
+ } else
+ return nullptr;
+
+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
+ const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlignment(ValTy);
+
+ unsigned Size = DL.getTypeStoreSize(ValTy);
+
+ if (IsVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (IsNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (IsDereferenceable)
+ Flags |= MachineMemOperand::MODereferenceable;
+ if (IsInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+ Alignment, AAInfo, Ranges);
+}
+
+CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000000000..cf6711adad48d
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,546 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "function-lowering-info"
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (const User *U : I->users())
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+
+ return false;
+}
+
+static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+ // For the users of the source value being used for compare instruction, if
+ // the number of signed predicate is greater than unsigned predicate, we
+ // prefer to use SIGN_EXTEND.
+ //
+ // With this optimization, we would be able to reduce some redundant sign or
+ // zero extension instruction, and eventually more machine CSE opportunities
+ // can be exposed.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ unsigned NumOfSigned = 0, NumOfUnsigned = 0;
+ for (const User *U : V->users()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ NumOfSigned += CI->isSigned();
+ NumOfUnsigned += CI->isUnsigned();
+ }
+ }
+ if (NumOfSigned > NumOfUnsigned)
+ ExtendKind = ISD::SIGN_EXTEND;
+
+ return ExtendKind;
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
+ SelectionDAG *DAG) {
+ Fn = &fn;
+ MF = &mf;
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ DA = DAG->getDivergenceAnalysis();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ CallingConv::ID CC = Fn->getCallingConv();
+
+ GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ mf.getDataLayout());
+ CanLowerReturn =
+ TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext());
+
+ // If this personality uses funclets, we need to do a bit more work.
+ DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
+ EHPersonality Personality = classifyEHPersonality(
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
+ if (isFuncletEHPersonality(Personality)) {
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (const AllocaInst *AI = H.CatchObj.Alloca)
+ CatchObjects.insert({AI, {}}).first->second.push_back(
+ &H.CatchObj.FrameIndex);
+ else
+ H.CatchObj.FrameIndex = INT_MAX;
+ }
+ }
+ }
+ if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+ }
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ for (const BasicBlock &BB : *Fn) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ Type *Ty = AI->getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Align <= StackAlign))) {
+ const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
+ uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ int FrameIndex = INT_MAX;
+ auto Iter = CatchObjects.find(AI);
+ if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
+ FrameIndex = MF->getFrameInfo().CreateFixedObject(
+ TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
+ MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
+ } else {
+ FrameIndex =
+ MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
+ }
+
+ StaticAllocaMap[AI] = FrameIndex;
+ // Update the catch handler information.
+ if (Iter != CatchObjects.end()) {
+ for (int *CatchObjPtr : Iter->second)
+ *CatchObjPtr = FrameIndex;
+ }
+ } else {
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
+ if (Align <= StackAlign)
+ Align = 0;
+ // Inform the Frame Information that we have variable-sized objects.
+ MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
+ }
+ }
+
+ // Look for inline asm that clobbers the SP register.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ ImmutableCallSite CS(&I);
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ std::vector<TargetLowering::AsmOperandInfo> Ops =
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
+ for (TargetLowering::AsmOperandInfo &Op : Ops) {
+ if (Op.Type == InlineAsm::isClobber) {
+ // Clobbers don't have SDValue operands, hence SDValue().
+ TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+ Op.ConstraintVT);
+ if (PhysReg.first == SP)
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
+ }
+ }
+ }
+ }
+
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo().setHasVAStart(true);
+ }
+
+ // If we have a musttail call in a variadic function, we need to ensure we
+ // forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
+ }
+
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(&I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
+ InitializeRegForValue(&I);
+
+ // Decide the preferred extend type for a value.
+ PreferredExtendType[&I] = getPreferredExtendForValue(&I);
+ }
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (const BasicBlock &BB : *Fn) {
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB.isEHPad()) {
+ const Instruction *PadInst = BB.getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid
+ // setting this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(PadInst)) {
+ MF->setHasEHScopes(true);
+ MF->setHasEHFunclets(true);
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(PadInst)) {
+ assert(&*BB.begin() == PadInst &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(PadInst))
+ assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB);
+ MBBMap[&BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Mark landing pad blocks.
+ if (BB.isEHPad())
+ MBB->setIsEHPad();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (const PHINode &PN : BB.phis()) {
+ if (PN.use_empty())
+ continue;
+
+ // Skip empty types
+ if (PN.getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN.getDebugLoc();
+ unsigned PHIReg = ValueMap[&PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ if (isFuncletEHPersonality(Personality)) {
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ }
+ }
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const auto *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const auto *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
+ }
+ }
+
+ else if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ // Map all BB references in the WinEH data to MBBs.
+ DenseMap<BBOrMBB, BBOrMBB> NewMap;
+ for (auto &KV : EHInfo.EHPadUnwindMap) {
+ const auto *Src = KV.first.get<const BasicBlock *>();
+ const auto *Dst = KV.second.get<const BasicBlock *>();
+ NewMap[MBBMap[Src]] = MBBMap[Dst];
+ }
+ EHInfo.EHPadUnwindMap = std::move(NewMap);
+ }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ VirtReg2Value.clear();
+ StaticAllocaMap.clear();
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ DescribedArgs.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+ RegsWithFixups.clear();
+ StatepointStackSlots.clear();
+ StatepointSpillMaps.clear();
+ PreferredExtendType.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+ return RegInfo->createVirtualRegister(
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = CreateReg(RegisterVT, isDivergent);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+ DA->isDivergent(V));
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return nullptr;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return nullptr;
+
+ if (BitWidth > LOI->Known.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!Register::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ DestLOI.Known = KnownBits(BitWidth);
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.Known.Zero = ~Val;
+ DestLOI.Known.One = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!Register::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.Known.Zero.getBitWidth() == BitWidth &&
+ DestLOI.Known.One.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ DestLOI.Known = KnownBits(BitWidth);
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.Known.Zero &= ~Val;
+ DestLOI.Known.One &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!Register::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.Known.Zero &= SrcLOI->Known.Zero;
+ DestLOI.Known.One &= SrcLOI->Known.One;
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ auto I = ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ return INT_MAX;
+}
+
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ unsigned &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
+const Value *
+FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+ if (VirtReg2Value.empty()) {
+ SmallVector<EVT, 4> ValueVTs;
+ for (auto &P : ValueMap) {
+ ValueVTs.clear();
+ ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ P.first->getType(), ValueVTs);
+ unsigned Reg = P.second;
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ VirtReg2Value[Reg++] = P.first;
+ }
+ }
+ }
+ return VirtReg2Value.lookup(Vreg);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 0000000000000..c5095995ec2e0
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,1165 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instr-emitter"
+
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// countOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (Register::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (Register::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = nullptr;
+ MVT VT = Node->getSimpleValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
+
+ if (!IsClone && !IsCloned)
+ for (SDNode *User : Node->uses()) {
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = nullptr;
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(TRI->isTypeLegalForClass(*UseRC, VT) &&
+ "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ unsigned NumResults = CountResults(Node);
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ // Always let the value type influence the used register class. The
+ // constraints on the instruction may be too lax to represent the value
+ // type correctly. For example, a 64-bit float (X86::FR64) can't live in
+ // the 32-bit float super-class (X86::FR32).
+ if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
+ const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+ Node->getSimpleValueType(i),
+ (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
+ if (RC)
+ VTRC = TRI->getCommonSubClass(RC, VTRC);
+ if (VTRC)
+ RC = VTRC;
+ }
+
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(Register::isPhysicalRegister(VRBase));
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MIB.addReg(VRBase, RegState::Define);
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ // If this def corresponds to a result of the SDNode insert the VRBase into
+ // the lookup map.
+ if (i < NumResults) {
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ const TargetRegisterClass *RC = TLI->getRegClassFor(
+ Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ Register VReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+
+ const MCInstrDesc &MCID = MIB->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
+ if (II) {
+ const TargetRegisterClass *OpRC = nullptr;
+ if (IIOpNum < II->getNumOperands())
+ OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+
+ if (OpRC) {
+ const TargetRegisterClass *ConstrainedRC
+ = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+ if (!ConstrainedRC) {
+ OpRC = TRI->getAllocatableClass(OpRC);
+ assert(OpRC && "Constraints cannot be fulfilled for allocation");
+ Register NewVReg = MRI->createVirtualRegister(OpRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ } else {
+ assert(ConstrainedRC->isAllocatable() &&
+ "Constraining an allocatable VReg produced an unallocatable class?");
+ }
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MIB->getNumOperands();
+ while (Idx > 0 &&
+ MIB->getOperand(Idx-1).isReg() &&
+ MIB->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+ getDebugRegState(IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding.
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MIB.addImm(C->getSExtValue());
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ MIB.addFPImm(F->getConstantFPValue());
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ unsigned VReg = R->getReg();
+ MVT OpVT = Op.getSimpleValueType();
+ const TargetRegisterClass *IIRC =
+ II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
+ : nullptr;
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT)
+ ? TLI->getRegClassFor(OpVT,
+ Op.getNode()->isDivergent() ||
+ (IIRC && TRI->isDivergentRegClass(IIRC)))
+ : nullptr;
+
+ if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ Register NewVReg = MRI->createVirtualRegister(IIRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MIB.addReg(VReg, getImplRegState(Imp));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MIB.addRegMask(RM->getRegMask());
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags());
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MIB.addMBB(BBNode->getBasicBlock());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MIB.addFrameIndex(FI->getIndex());
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = MF->getDataLayout().getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = MF->getDataLayout().getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+ } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) {
+ MIB.addSym(SymNode->getMCSymbol());
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MIB.addBlockAddress(BA->getBlockAddress(),
+ BA->getOffset(),
+ BA->getTargetFlags());
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, bool isDivergent, const DebugLoc &DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ Register NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+
+ unsigned Reg;
+ MachineInstr *DefMI;
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+ if (R && Register::isPhysicalRegister(R->getReg())) {
+ Reg = R->getReg();
+ DefMI = nullptr;
+ } else {
+ Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap);
+ DefMI = MRI->getVRegDef(Reg);
+ }
+
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ } else {
+ // Reg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ if (Register::isVirtualRegister(Reg))
+ Reg = ConstrainForSubReg(Reg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->isDivergent(), Node->getDebugLoc());
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ MachineInstrBuilder CopyMI =
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+ if (Register::isVirtualRegister(Reg))
+ CopyMI.addReg(Reg, 0, SubIdx);
+ else
+ CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
+ VRBase = MRI->createVirtualRegister(SRC);
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MIB.addImm(SD->getZExtValue());
+ } else
+ AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregister being inserted
+ AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MIB.addImm(SubIdx);
+ MBB->insert(InsertPos, MIB);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ // If the input pattern has a chain, then the root of the corresponding
+ // output pattern will get a chain as well. This can happen to be a
+ // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults).
+ if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ --NumOps; // Ignore chain if it exists.
+
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !Register::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ }
+ AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MIB);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ MDNode *Var = SD->getVariable();
+ const DIExpression *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ SD->setIsEmitted();
+
+ if (SD->isInvalidated()) {
+ // An invalidated SDNode must generate an undef DBG_VALUE: although the
+ // original value is no longer computed, earlier DBG_VALUEs live ranges
+ // must not leak into later code.
+ auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.addReg(0U);
+ MIB.addReg(0U, RegState::Debug);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ return &*MIB;
+ }
+
+ if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ // Stack address; this needs to be lowered in target-dependent fashion.
+ // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+ auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(SD->getFrameIx());
+
+ if (SD->isIndirect())
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ FrameMI.addReg(0);
+ return FrameMI.addMetadata(Var).addMetadata(Expr);
+ }
+ // Otherwise, we're going to create an instruction here.
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::VREG) {
+ MIB.addReg(SD->getVReg(), RegState::Debug);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ const Value *V = SD->getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Note: This assumes that all nullptr constants are zero-valued.
+ MIB.addImm(0);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MIB.addReg(0U);
+ }
+
+ // Indirect addressing is indicated by an Imm as the second parameter.
+ if (SD->isIndirect())
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ MIB.addReg(0U, RegState::Debug);
+
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+
+ return &*MIB;
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
+ MDNode *Label = SD->getLabel();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ MIB.addMetadata(Label);
+
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NumDefs = II.getNumDefs();
+ const MCPhysReg *ScratchRegs = nullptr;
+
+ // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+ // Stackmaps do not have arguments and do not preserve their calling
+ // convention. However, to simplify runtime support, they clobber the same
+ // scratch registers as AnyRegCC.
+ unsigned CC = CallingConv::AnyReg;
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ }
+ ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ }
+
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
+ bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults) {
+ CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+
+ // Transfer any IR flags from the SDNode to the MachineInstr
+ MachineInstr *MI = MIB.getInstr();
+ const SDNodeFlags Flags = Node->getFlags();
+ if (Flags.hasNoSignedZeros())
+ MI->setFlag(MachineInstr::MIFlag::FmNsz);
+
+ if (Flags.hasAllowReciprocal())
+ MI->setFlag(MachineInstr::MIFlag::FmArcp);
+
+ if (Flags.hasNoNaNs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoNans);
+
+ if (Flags.hasNoInfs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoInfs);
+
+ if (Flags.hasAllowContract())
+ MI->setFlag(MachineInstr::MIFlag::FmContract);
+
+ if (Flags.hasApproximateFuncs())
+ MI->setFlag(MachineInstr::MIFlag::FmAfn);
+
+ if (Flags.hasAllowReassociation())
+ MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+
+ if (Flags.hasNoUnsignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoUWrap);
+
+ if (Flags.hasNoSignedWrap())
+ MI->setFlag(MachineInstr::MIFlag::NoSWrap);
+
+ if (Flags.hasExact())
+ MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+ if (Flags.hasFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::FPExcept);
+ }
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = NumDefs > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Add scratch registers as implicit def and early clobber
+ if (ScratchRegs)
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
+ RegState::EarlyClobber);
+
+ // Set the memory reference descriptions of this instruction now that it is
+ // part of the function.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MIB);
+
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<Register, 8> UsedRegs;
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = NumDefs; i < NumResults; ++i) {
+ Register Reg = II.getImplicitDefs()[i - NumDefs];
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ Register Reg = R->getReg();
+ if (Reg.isPhysical())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
+ MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+
+ // Run post-isel target hook to adjust this instruction if needed.
+ if (II.hasPostISelHook())
+ TLI->AdjustInstrPostInstrSelection(*MIB, Node);
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ SDValue SrcVal = Node->getOperand(2);
+ if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
+ SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Instead building a COPY to that vreg destination, build an
+ // IMPLICIT_DEF instruction instead.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ break;
+ }
+ unsigned SrcReg;
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL: {
+ unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL)
+ ? TargetOpcode::EH_LABEL
+ : TargetOpcode::ANNOTATION_LABEL;
+ MCSymbol *S = cast<LabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(Opc)).addSym(S);
+ break;
+ }
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR
+ ? TargetOpcode::INLINEASM_BR
+ : TargetOpcode::INLINEASM;
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MIB.addExternalSymbol(AsmStr);
+
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MIB.addImm(ExtraInfo);
+
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
+ // Remember registers that are part of early-clobber defs.
+ SmallVector<unsigned, 8> ECRegs;
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ GroupIdx.push_back(MIB->getNumOperands());
+ MIB.addImm(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MIB.addReg(Reg,
+ RegState::Define |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg,
+ RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
+ ECRegs.push_back(Reg);
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
+ AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MIB->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
+ break;
+ }
+ }
+
+ // GCC inline assembly allows input operands to also be early-clobber
+ // output operands (so long as the operand is written only after it's
+ // used), but this does not match the semantics of our early-clobber flag.
+ // If an early-clobber operand register is also an input operand register,
+ // then remove the early-clobber flag.
+ for (unsigned Reg : ECRegs) {
+ if (MIB->readsRegister(Reg, TRI)) {
+ MachineOperand *MO =
+ MIB->findRegisterDefOperand(Reg, false, false, TRI);
+ assert(MO && "No def operand for clobbered register?");
+ MO->setIsEarlyClobber(false);
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MIB.addMetadata(MD);
+
+ MBB->insert(InsertPos, MIB);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()), MRI(&MF->getRegInfo()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()),
+ TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
+ InsertPos(insertpos) {}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 0000000000000..cfe99dd977b5b
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,142 @@
+//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+
+class MachineInstrBuilder;
+class MCInstrDesc;
+class SDDbgValue;
+
+class LLVM_LIBRARY_VISIBILITY InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ void CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
+ bool isDivergent, const DebugLoc &DL);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// Generate machine instruction for a dbg_label node.
+ MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000000000..f9fdf525240fa
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,4646 @@
+//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "legalizedag"
+
+namespace {
+
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+ uint8_t SignBit;
+};
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+class SelectionDAGLegalize {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ /// The set of nodes which have already been legalized. We hold a
+ /// reference to it in order to update as necessary on node deletion.
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes;
+
+ /// A set of all the nodes updated during legalization.
+ SmallSetVector<SDNode *, 16> *UpdatedNodes;
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ // Libcall insertion helpers.
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG,
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes,
+ SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
+ : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
+ LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
+
+ /// Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
+
+private:
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
+ /// Some targets cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+
+ /// Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const;
+
+ bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ bool &NeedInvert, const SDLoc &dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ SDValue ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl);
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl, SDValue ChainIn);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL,
+ SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
+ const SDLoc &dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ const SDLoc &dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ const SDLoc &dl);
+
+ SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
+ SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
+
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+public:
+ // Node replacement helpers
+
+ void ReplacedNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(N);
+ }
+
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ assert(Old->getNumValues() == New->getNumValues() &&
+ "Replacing one node with another that produces a different number "
+ "of values!");
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New);
+ ReplacedNode(Old);
+ }
+
+ void ReplaceNode(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
+
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: ");
+ New[i]->dump(&DAG));
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New[i].getNode());
+ }
+ ReplacedNode(Old);
+ }
+
+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesOfValueWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
+};
+
+} // end anonymous namespace
+
+/// Return a vector shuffle operation which
+/// performs the same shuffle in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
+ EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask);
+}
+
+/// Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
+ bool Extend = false;
+ SDLoc dl(CFP);
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl,
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ APFloat APF = CFP->getValueAPF();
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+
+ // We don't want to shrink SNaNs. Converting the SNaN back to its real type
+ // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
+ if (!APF.isSignaling()) {
+ while (SVT != MVT::f32 && SVT != MVT::f16) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+ }
+
+ SDValue CPIdx =
+ DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend) {
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ Alignment);
+ return Result;
+ }
+ SDValue Result = DAG.getLoad(
+ OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Result = DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Some target cannot handle a variable insertion index for the
+/// INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
+ SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+
+ SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
+
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI));
+}
+
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDLoc dl(ST);
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ SDLoc(CFP), MVT::i32);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, dl, Ptr.getValueType()));
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ MinAlign(Alignment, 4U), MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue(nullptr, 0);
+}
+
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDLoc dl(Node);
+
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ if (!ST->isTruncatingStore()) {
+ LLVM_DEBUG(dbgs() << "Legalizing store operation\n");
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ EVT MemVT = ST->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
+ LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ } else
+ LLVM_DEBUG(dbgs() << "Legal store\n");
+ break;
+ }
+ case TargetLowering::Custom: {
+ LLVM_DEBUG(dbgs() << "Trying custom lowering\n");
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ auto &DL = DAG.getDataLayout();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned LogStWidth = Log2_32(StWidth);
+ assert(LogStWidth < 32);
+ unsigned RoundWidth = 1 << LogStWidth;
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (DL.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(
+ Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getTruncStore(
+ Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ SDValue Result;
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ if (TLI.isTypeLegal(StVT)) {
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ } else {
+ // The in-memory type isn't legal. Truncate to the type it would promote
+ // to, and then do a truncstore.
+ Value = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
+ Value);
+ Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ StVT, Alignment, MMOFlags, AAInfo);
+ }
+
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ SDLoc dl(Node);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n");
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ break;
+ }
+ case TargetLowering::Custom:
+ if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(RVal.getNode());
+ UpdatedNodes->insert(RChain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
+ TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned LogSrcWidth = Log2_32(SrcWidth);
+ assert(LogSrcWidth < 32);
+ unsigned RoundWidth = 1 << LogSrcWidth;
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+ auto &DL = DAG.getDataLayout();
+
+ if (DL.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
+ SrcVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ LLVM_FALLTHROUGH;
+ case TargetLowering::Legal:
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ EVT MemVT = LD->getMemoryVT();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ }
+ break;
+
+ case TargetLowering::Expand: {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
+ Ptr, ISrcVT, LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign-
+ // and zero-extend operations are currently folded into extending
+ // loads, whether they are legal or not, and then we end up here
+ // without any support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an
+ // explicit zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0),
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Value.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+}
+
+/// Return a legal replacement for the given operation, with all legal operands.
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+
+ // Allow illegal target nodes and illegal registers.
+ if (Node->getOpcode() == ISD::TargetConstant ||
+ Node->getOpcode() == ISD::Register)
+ return;
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
+ "Unexpected illegal type!");
+
+ for (const SDValue &Op : Node->op_values())
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ Op.getOpcode() == ISD::TargetConstant ||
+ Op.getOpcode() == ISD::Register) &&
+ "Unexpected illegal type!");
+#endif
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::FP_TO_FP16:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::ATOMIC_STORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_DWARF_CFA:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::ADDROFRETURNADDR:
+ case ISD::SPONENTRY:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
+ case ISD::READ_REGISTER:
+ case ISD::WRITE_REGISTER:
+ // Named register is legal in the DAG, but blocked by register name
+ // selection if not implemented by target (to chose the correct register)
+ // They'll be converted to Copy(To/From)Reg.
+ Action = TargetLowering::Legal;
+ break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ }
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::MSCATTER:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::MSTORE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(0).getValueType());
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SDNode *NewNode = Node;
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!Op1.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ // Do not try to legalize SAO here! It will be automatically legalized
+ // in the next round.
+ if (SAO != Op1)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO);
+ }
+ }
+ break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS: {
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ if (!Op2.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ if (SAO != Op2)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
+ }
+ break;
+ }
+ }
+
+ if (NewNode != Node) {
+ ReplaceNode(Node, NewNode);
+ Node = NewNode;
+ }
+ switch (Action) {
+ case TargetLowering::Legal:
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
+ return;
+ case TargetLowering::Custom:
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ if (!(Res.getNode() != Node || Res.getResNo() != 0))
+ return;
+
+ if (Node->getNumValues() == 1) {
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
+ // We can just directly replace this node with the lowered value.
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Res.getValue(i));
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
+ ReplaceNode(Node, ResultVals.data());
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_FALLTHROUGH;
+ case TargetLowering::Expand:
+ if (ExpandNode(Node))
+ return;
+ LLVM_FALLTHROUGH;
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
+ return;
+ case TargetLowering::Promote:
+ PromoteNode(Node);
+ return;
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ break;
+ case ISD::LOAD:
+ return LegalizeLoadOps(Node);
+ case ISD::STORE:
+ return LegalizeStoreOps(Node);
+ }
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Before we generate a new store to a temporary stack slot, see if there is
+ // already one that we can use. There often is because when we scalarize
+ // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole
+ // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
+ // the vector. If all are expanded here, we don't want one store per vector
+ // element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Op.getNode());
+ Worklist.push_back(Idx.getNode());
+ SDValue StackPtr, Ch;
+ for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
+ UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
+ if (ST->isIndexed() || ST->isTruncatingStore() ||
+ ST->getValue() != Vec)
+ continue;
+
+ // Make sure that nothing else could have stored into the destination of
+ // this store.
+ if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
+ continue;
+
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load). Also, the store might be
+ // dependent on the extractelement and introduce a cycle when creating
+ // the load.
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
+ ST->hasPredecessor(Op.getNode()))
+ continue;
+
+ StackPtr = ST->getBasePtr();
+ Ch = SDValue(ST, 0);
+ break;
+ }
+ }
+
+ EVT VecVT = Vec.getValueType();
+
+ if (!Ch.getNode()) {
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ StackPtr = DAG.CreateStackTemporary(VecVT);
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo());
+ }
+
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ SDValue NewLoad;
+
+ if (Op.getValueType().isVector())
+ NewLoad =
+ DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo());
+ else
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ VecVT.getVectorElementType());
+
+ // Replace the chain going out of the store, by the one out of the load.
+ DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+ // We introduced a cycle though, so update the loads operands, making sure
+ // to use the original store's chain as an incoming chain.
+ SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+ NewLoad->op_end());
+ NewLoadOperands[0] = Ch;
+ NewLoad =
+ SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+ return NewLoad;
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ EVT VecVT = Vec.getValueType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ // Then store the inserted part.
+ SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ // Store the subvector.
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(Node);
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).isUndef()) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset), EltVT));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, PtrInfo.getWithOffset(Offset)));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
+ if (TLI.isTypeLegal(IVT)) {
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignMask(NumBits);
+ State.SignBit = NumBits - 1;
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
+ } else {
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+ IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
+ }
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+ State.SignBit = 7;
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform Mag value to integer, and clear the sign bit.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ EVT MagVT = MagAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue,
+ ClearSignMask);
+
+ // Get the signbit at the right position for MagAsInt.
+ int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+ EVT ShiftVT = IntVT;
+ if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+ ShiftVT = MagVT;
+ }
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
+ }
+ if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
+ }
+
+ // Store the part with the modified sign and convert back to float.
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (Align > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Align, dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
+/// target.
+///
+/// If the SETCC has been legalized using AND / OR, then the legalized node
+/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+/// will be set to false.
+///
+/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+/// then the values of LHS and RHS will be swapped, CC will be set to the
+/// new condition, and NeedInvert will be set to false.
+///
+/// If the SETCC has been legalized using the inverse condcode, then LHS and
+/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
+/// will be set to true. The caller must invert the result of the SETCC with
+/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
+/// of a true/false result.
+///
+/// \returns true if the SetCC has been legalized, false if it hasn't.
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
+ SDValue &RHS, SDValue &CC,
+ bool &NeedInvert,
+ const SDLoc &dl) {
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
+ // Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
+ InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETO:
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ LLVM_FALLTHROUGH;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl) {
+ return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
+}
+
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl,
+ SDValue Chain) {
+ // Create the stack frame object.
+ unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+ SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ unsigned SrcSize = SrcOp.getValueSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
+ SlotVT, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store =
+ DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
+ DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ SDLoc dl(Node);
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+}
+
+static bool
+ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue &Res) {
+ unsigned NumElems = Node->getNumOperands();
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Try to group the scalars into pairs, shuffle the pairs together, then
+ // shuffle the pairs of pairs together, etc. until the vector has
+ // been built. This will work only if all of the necessary shuffle masks
+ // are legal.
+
+ // We do this in two phases; first to check the legality of the shuffles,
+ // and next, assuming that all shuffles are legal, to create the new nodes.
+ for (int Phase = 0; Phase < 2; ++Phase) {
+ SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals,
+ NewIntermedVals;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+
+ SDValue Vec;
+ if (Phase)
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V);
+ IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i)));
+ }
+
+ while (IntermedVals.size() > 2) {
+ NewIntermedVals.clear();
+ for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) {
+ // This vector and the next vector are shuffled together (simply to
+ // append the one to the other).
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+
+ SmallVector<int, 16> FinalIndices;
+ FinalIndices.reserve(IntermedVals[i].second.size() +
+ IntermedVals[i+1].second.size());
+
+ int k = 0;
+ for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = j;
+ FinalIndices.push_back(IntermedVals[i].second[j]);
+ }
+ for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = NumElems + j;
+ FinalIndices.push_back(IntermedVals[i+1].second[j]);
+ }
+
+ SDValue Shuffle;
+ if (Phase)
+ Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
+ IntermedVals[i+1].first,
+ ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ NewIntermedVals.push_back(
+ std::make_pair(Shuffle, std::move(FinalIndices)));
+ }
+
+ // If we had an odd number of defined values, then append the last
+ // element to the array of new vectors.
+ if ((IntermedVals.size() & 1) != 0)
+ NewIntermedVals.push_back(IntermedVals.back());
+
+ IntermedVals.swap(NewIntermedVals);
+ }
+
+ assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 &&
+ "Invalid number of intermediate vectors");
+ SDValue Vec1 = IntermedVals[0].first;
+ SDValue Vec2;
+ if (IntermedVals.size() > 1)
+ Vec2 = IntermedVals[1].first;
+ else if (Phase)
+ Vec2 = DAG.getUNDEF(VT);
+
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[0].second[i]] = i;
+ for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
+
+ if (Phase)
+ Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ }
+
+ return true;
+}
+
+/// Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ SmallVector<Constant*, 16> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).isUndef());
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ }
+
+ SmallSet<SDValue, 16> DefinedValues;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Node->getOperand(i).isUndef())
+ continue;
+ DefinedValues.insert(Node->getOperand(i));
+ }
+
+ if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) {
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ }
+ } else {
+ SDValue Res;
+ if (ExpandBVWithShuffles(Node, DAG, TLI, Res))
+ return Res;
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
+// Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position and that the return types match.
+ SDValue TCChain = InChain;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Node, TCChain) &&
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend)
+ .setIsPostTypeLegalization(true);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode()) {
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+ }
+
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
+ return CallInfo.first;
+}
+
+// Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
+ RTLIB::Libcall LC;
+ switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+
+ return ExpandLibCall(LC, Node, false);
+}
+
+/// Issue libcalls to __{u}divmod to compute div / rem pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem =
+ DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo());
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Only issue sincos libcall if both sin and cos are needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// Issue libcalls to sincos to compute sin / cos pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
+ TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo()));
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
+}
+
+/// This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
+ EVT DestVT,
+ const SDLoc &dl) {
+ EVT SrcVT = Op0.getValueType();
+
+ // TODO: Should any fast-math-flags be set for the created nodes?
+ LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), dl,
+ StackSlot.getValueType());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
+ StackSlot, WordOff);
+ if (DAG.getDataLayout().isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo,
+ MachinePointerInfo());
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2 =
+ DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo());
+ // load the constructed double
+ SDValue Load =
+ DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo());
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ dl, MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
+
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
+ DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0, dl),
+ Four = DAG.getIntPtrConstant(4, dl);
+ SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (DAG.getDataLayout().isLittleEndian())
+ FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx =
+ DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ else {
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
+ bool isSigned,
+ const SDLoc &dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (true) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
+ bool isSigned,
+ const SDLoc &dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (true) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ // A larger signed type can hold all unsigned values of the requested type,
+ // so using FP_TO_SINT is valid
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
+ APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
+ APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
+ APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+ // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ return Tmp;
+ }
+
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift <<= J;
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
+/// Open code the operations for BSWAP of the specified operation.
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ // Use a rotate by 8. This can be further expanded if necessary.
+ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(0xFF0000, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
+ DAG.getConstant(255ULL<<48, dl, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
+ DAG.getConstant(255ULL<<40, dl, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
+ DAG.getConstant(255ULL<<32, dl, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL<<8 , dl, VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to expand node\n");
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
+ switch (Node->getOpcode()) {
+ case ISD::ABS:
+ if (TLI.expandABS(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTPOP:
+ if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BITREVERSE:
+ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ break;
+ case ISD::EH_DWARF_CFA: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ CfaArg.getValueType(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ CfaArg.getValueType()),
+ CfaArg);
+ SDValue FA = DAG.getNode(
+ ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()),
+ DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(),
+ FA, Offset));
+ break;
+ }
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, dl, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+ // splits out the success value as a comparison. Expanding the resulting
+ // ATOMIC_CMP_SWAP will produce a libcall.
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand());
+
+ SDValue ExtRes = Res;
+ SDValue LHS = Res;
+ SDValue RHS = Node->getOperand(1);
+
+ EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT();
+ EVT OuterType = Node->getValueType(0);
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
+ LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType,
+ Node->getOperand(2), DAG.getValueType(AtomicType));
+ ExtRes = LHS;
+ break;
+ case ISD::ZERO_EXTEND:
+ LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
+ ExtRes = LHS;
+ break;
+ case ISD::ANY_EXTEND:
+ LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
+ SDValue Success =
+ DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ);
+
+ Results.push_back(ExtRes.getValue(0));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, dl, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, dl, VT));
+ }
+ break;
+ }
+ case ISD::STRICT_FP_ROUND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getValueType(0),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_EXTEND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+
+ // An in-register sign-extend of a boolean is a negation:
+ // 'true' (1) sign-extended is -1.
+ // 'false' (0) sign-extended is 0.
+ // However, we must mask the high bits of the source operand because the
+ // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
+
+ // TODO: Do this for vectors too?
+ if (ExtraVT.getSizeInBits() == 1) {
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
+ Results.push_back(Neg);
+ break;
+ }
+
+ // NOTE: we could fall back on load/store here too for targets without
+ // SRA. However, it is doubtful that any exist.
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned BitsDiff = VT.getScalarSizeInBits() -
+ ExtraVT.getScalarSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ Results.push_back(Tmp1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+ return true;
+ }
+ break;
+ case ISD::FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::STRICT_FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+ // Relink the chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+ // Replace the new UINT result.
+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+ return true;
+ }
+ break;
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS:
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
+ // it.
+ if (NewEltVT.bitsLT(EltVT)) {
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT =
+ EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits() / NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor =
+ NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ else
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getConstant(Idx - NumElems, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+
+ Tmp1 = DAG.getBuildVector(VT, dl, Ops);
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(
+ Node->getOperand(0).getValueType(),
+ DAG.getDataLayout())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
+ break;
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: {
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ ISD::CondCode Pred;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX: Pred = ISD::SETGT; break;
+ case ISD::SMIN: Pred = ISD::SETLT; break;
+ case ISD::UMAX: Pred = ISD::SETUGT; break;
+ case ISD::UMIN: Pred = ISD::SETULT; break;
+ }
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ isSinCosLibcallAvailable(Node, TLI))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::FCOS)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::FMAD:
+ llvm_unreachable("Illegal fmad should never be formed");
+
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
+ }
+ break;
+ case ISD::FP_TO_FP16:
+ LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
+ if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ SDValue Op = Node->getOperand(0);
+ MVT SVT = Op.getSimpleValueType();
+ if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+ // Under fastmath, we can expand this node into a fround followed by
+ // a float-half conversion.
+ SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(
+ DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
+ }
+ }
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
+ DAG.getMachineFunction().getFunction().hasOptSize()))
+ Results.push_back(ExpandConstantFP(CFP, true));
+ break;
+ }
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags Flags = Node->getFlags();
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+ VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ Results.push_back(Tmp1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode =
+ Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ MVT VT = LHS.getSimpleValueType();
+ unsigned MULHOpcode =
+ Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS;
+
+ if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) {
+ Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS));
+ Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS));
+ break;
+ }
+
+ SmallVector<SDValue, 4> Halves;
+ EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
+ assert(TLI.isTypeLegal(HalfType));
+ if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+ HalfType, DAG,
+ TargetLowering::MulExpansionKind::Always)) {
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]);
+ SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]);
+ SDValue Shift = DAG.getConstant(
+ HalfType.getScalarSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ break;
+ }
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+
+ SDValue Lo, Hi;
+ EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
+ TLI.expandMUL(Node, Lo, Hi, HalfType, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
+ SDValue Shift =
+ DAG.getConstant(HalfType.getSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (TLI.expandFunnelShift(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (TLI.expandROT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ Results.push_back(TLI.expandAddSubSat(Node, DAG));
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ Results.push_back(TLI.expandFixedPointMul(Node, DAG));
+ break;
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Carry = Node->getOperand(2);
+
+ bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
+
+ // Initial add of the 2 operands.
+ unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+ EVT VT = LHS.getValueType();
+ SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
+
+ // Initial check for overflow.
+ EVT CarryType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ // Add of the sum and the carry.
+ SDValue CarryExt =
+ DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+ SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+ // Second check for overflow. If we are adding, we can only overflow if the
+ // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+ // If we are subtracting, we can only overflow if the initial sum is 0 and
+ // the carry is set, resulting in a new sum of all 1s.
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Overflow2 =
+ IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+ : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+ Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+ DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+ SDValue ResultCarry =
+ DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+ Results.push_back(Sum2);
+ Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ SDValue Result, Overflow;
+ if (TLI.expandMULO(Node, Result, Overflow, DAG)) {
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ }
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(
+ ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(PairTy, DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, dl, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Tmp1->setFlags(Node->getFlags());
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ const DataLayout &TD = DAG.getDataLayout();
+ EVT PTy = TLI.getPointerTy(TD);
+
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ // For power-of-two jumptable entry sizes convert multiplication to a shift.
+ // This transformation needs to be done here since otherwise the MIPS
+ // backend will end up emitting a three instruction multiply sequence
+ // instead of a single shift and MSP430 will call a runtime function.
+ if (llvm::isPowerOf2_32(EntrySize))
+ Index = DAG.getNode(
+ ISD::SHL, dl, Index.getValueType(), Index,
+ DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+ else
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EntrySize, dl, Index.getValueType()));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
+ Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
+ Addr = LD;
+ if (TLI.isJumpTableRelative()) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF or another AND.
+ if (Tmp2.isUndef() ||
+ (Tmp2.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
+ cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ Tmp3 = Tmp2;
+ else
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, dl, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, dl, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, NeedInvert, dl);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (Tmp3.getNode())
+ Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Node->getFlags());
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(Tmp1.getValueType())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(TrueValue, dl, VT),
+ DAG.getConstant(0, dl, VT),
+ Tmp3);
+ Tmp1->setFlags(Node->getFlags());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ EVT VT = Node->getValueType(0);
+ SDValue CC = Node->getOperand(4);
+ ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) {
+ // If the condition code is legal, then we need to expand this
+ // node using SETCC and SELECT.
+ EVT CmpVT = Tmp1.getValueType();
+ assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+ "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+ "expanded.");
+ EVT CCVT = getSetCCResultType(CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
+ Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ break;
+ }
+
+ // SELECT_CC is legal, so the condition code must not be.
+ bool Legalized = false;
+ // Try to legalize by inverting the condition. This is for targets that
+ // might support an ordered version of a condition, but not the unordered
+ // version (or vice versa).
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
+ Tmp1.getValueType().isInteger());
+ if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
+ // Use the new condition code and swap true and false
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ Tmp1->setFlags(Node->getFlags());
+ } else {
+ // If The inverse is not legal, then try to swap the arguments using
+ // the inverse condition code.
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+ if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) {
+ // The swapped inverse condition is legal, so swap true and false,
+ // lhs and rhs.
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ Tmp1->setFlags(Node->getFlags());
+ }
+ }
+
+ if (!Legalized) {
+ Legalized = LegalizeSetCCCondCode(
+ getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
+ dl);
+
+ assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
+
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
+ if (CC.getNode()) {
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Tmp4, CC);
+ } else {
+ Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Tmp4, CC);
+ }
+ Tmp1->setFlags(Node->getFlags());
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
+ Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ (void)Legalized;
+ assert(Legalized && "Can't legalize BR_CC with legal condition!");
+
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // node.
+ if (Tmp4.getNode()) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+ } else {
+ Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
+ Tmp2, Tmp3, Node->getOperand(4));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Sh = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+
+ SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ break;
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ break;
+ }
+
+ if (Results.empty() && Node->isStrictFPOpcode()) {
+ // FIXME: We were asked to expand a strict floating-point operation,
+ // but there is currently no expansion implemented that would preserve
+ // the "strict" properties. For now, we just fall back to the non-strict
+ // version if that is legal on the target. The actual mutation of the
+ // operation will happen in SelectionDAGISel::DoInstructionSelection.
+ switch (Node->getOpcode()) {
+ default:
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ return true;
+ break;
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These are registered by the operand type instead of the value
+ // type. Reflect that here.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType())
+ == TargetLowering::Legal)
+ return true;
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Cannot expand node\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Successfully expanded node\n");
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ // FIXME: Check flags on the node to see if we can use a finite call.
+ bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath;
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "abort", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ case ISD::STRICT_FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ case ISD::STRICT_FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
+ case ISD::FSQRT:
+ case ISD::STRICT_FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FCBRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ case ISD::STRICT_FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ case ISD::STRICT_FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ case ISD::STRICT_FLOG:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ case ISD::STRICT_FLOG2:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ case ISD::STRICT_FLOG10:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ case ISD::STRICT_FEXP:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ case ISD::STRICT_FEXP2:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ case ISD::STRICT_FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ case ISD::STRICT_FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FROUND:
+ case ISD::STRICT_FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
+ case ISD::FPOWI:
+ case ISD::STRICT_FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ case ISD::STRICT_FPOW:
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::LROUND:
+ case ISD::STRICT_LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ case ISD::STRICT_LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ case ISD::STRICT_LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ case ISD::STRICT_LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ case ISD::STRICT_FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ case ISD::STRICT_FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ }
+ break;
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
+ case ISD::FSUB:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("LibCall explicitly requested, but not available");
+ case MVT::i32:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+ break;
+ case MVT::i64:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+ break;
+ case MVT::i128:
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+ break;
+ }
+ break;
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n");
+ ReplaceNode(Node, Results.data());
+ } else
+ LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n");
+}
+
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ LLVM_DEBUG(dbgs() << "Trying to promote node\n");
+ SmallVector<SDValue, 8> Results;
+ MVT OVT = Node->getSimpleValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ OVT = Node->getOperand(0).getSimpleValueType();
+ }
+ if (Node->getOpcode() == ISD::BR_CC)
+ OVT = Node->getOperand(2).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), dl, NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BITREVERSE:
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Tmp2.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ break;
+ }
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+
+ switch (Node->getOpcode()) {
+ default:
+ ExtOp = ISD::ANY_EXTEND;
+ break;
+ case ISD::SDIV:
+ case ISD::SREM:
+ ExtOp = ISD::SIGN_EXTEND;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ ExtOp = ISD::ZERO_EXTEND;
+ break;
+ }
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ // Promote to a multiply in a wider integer type.
+ unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2);
+
+ auto &DL = DAG.getDataLayout();
+ unsigned OriginalSize = OVT.getScalarSizeInBits();
+ Tmp2 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT)));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector() ||
+ Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1->setFlags(Node->getFlags());
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Node->getOperand(2), Node->getFlags()));
+ break;
+ }
+ case ISD::BR_CC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(1))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+ Node->getOperand(0), Node->getOperand(1),
+ Tmp1, Tmp2, Node->getOperand(4)));
+ break;
+ }
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FPOW:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0, dl)));
+ break;
+ case ISD::FMA:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+ DAG.getIntPtrConstant(0, dl)));
+ break;
+ case ISD::FCOPYSIGN:
+ case ISD::FPOWI: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
+ break;
+ }
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FTRUNC:
+ case ISD::FNEG:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FABS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0, dl)));
+ break;
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::ATOMIC_SWAP: {
+ AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+ SDLoc SL(Node);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
+ assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+ "unexpected promotion type");
+ assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+ "unexpected atomic_swap with illegal type");
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
+ DAG.getVTList(NVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+ Results.push_back(NewAtomic.getValue(1));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty()) {
+ LLVM_DEBUG(dbgs() << "Successfully promoted node\n");
+ ReplaceNode(Node, Results.data());
+ } else
+ LLVM_DEBUG(dbgs() << "Could not promote node\n");
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Legalize() {
+ AssignTopologicalOrder();
+
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ // Use a delete listener to remove nodes which were deleted during
+ // legalization from LegalizeNodes. This is needed to handle the situation
+ // where a new node is allocated by the object pool to the same address of a
+ // previously deleted node.
+ DAGNodeDeletedListener DeleteListener(
+ *this,
+ [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); });
+
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ while (true) {
+ bool AnyLegalized = false;
+ for (auto NI = allnodes_end(); NI != allnodes_begin();) {
+ --NI;
+
+ SDNode *N = &*NI;
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ continue;
+ }
+
+ if (LegalizedNodes.insert(N).second) {
+ AnyLegalized = true;
+ Legalizer.LegalizeOp(N);
+
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ }
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ RemoveDeadNodes();
+}
+
+bool SelectionDAG::LegalizeOp(SDNode *N,
+ SmallSetVector<SDNode *, 16> &UpdatedNodes) {
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes);
+
+ // Directly insert the node in question, and legalize it. This will recurse
+ // as needed through operands.
+ LegalizedNodes.insert(N);
+ Legalizer.LegalizeOp(N);
+
+ return LegalizedNodes.count(N);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000000000..72d052473f115
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,2330 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Convert Float Results to Integer
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ assert(R.getNode() != N);
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ // In ppcf128, the high 64 bits are always first in memory regardless
+ // of Endianness. LLVM's APFloat representation is not Endian sensitive,
+ // and so always converts into a 128-bit APInt in a non-Endian-sensitive
+ // way. However, APInt's are serialized in an Endian-sensitive fashion,
+ // so on big-Endian targets, the two doubles are output in the wrong
+ // order. Fix this by manually flipping the order of the high 64 bits
+ // and the low 64 bits here.
+ if (DAG.getDataLayout().isBigEndian() &&
+ CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) {
+ uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1],
+ CN->getValueAPF().bitcastToAPInt().getRawData()[0] };
+ APInt Val(128, words);
+ return DAG.getConstant(Val, SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ } else {
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size - 1);
+ SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[3] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType(),
+ N->getOperand(2).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ EVT FloatVT = N->getValueType(0);
+ if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
+ }
+
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, CallOptions, dl).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+ if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
+ AddToWorklist(Op.getNode());
+ }
+
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
+ CallOptions, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f16) {
+ // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
+ // storage-only type get a chance to select things.
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::f16)
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
+ L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags,
+ L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ if (N != NewL.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
+ dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(),
+ MMOFlags, L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ return BitConvertToInteger(ExtendNode);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC,
+ TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ Op, CallOptions, dl).first;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert Float Operand to Integer
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
+ case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this to re-analyze.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand promotion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
+ // If we get here, the result must be legal but the source illegal.
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (SVT == MVT::f16)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ // We actually deal with the partially-softened FP_TO_FP16 node too, which
+ // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(2), N->getOperand(3));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
+
+ // Truncate the result if the libcall returns a larger type.
+ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0, dl)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ NVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == 64 &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ SDLoc dl(N);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(64, C.getRawData()[1])),
+ dl, NVT);
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(64, C.getRawData()[0])),
+ dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDLoc dl(N);
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ ISD::SETEQ);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, CallOptions,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, CallOptions,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, CallOptions,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, CallOptions,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getMemoryVT(), LD->getMemOperand());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ SDLoc dl(N);
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ ArrayRef<uint64_t> Parts;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ // TODO: Are there fast-math-flags to propagate to this FADD?
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
+ APInt(128, Parts)),
+ dl, MVT::ppcf128));
+ Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
+ Lo, Hi, ISD::SETLT);
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(1), Lo, Hi);
+ // The ppcf128 value is providing only the sign; take it from the
+ // higher-order double (which must have the larger magnitude).
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N),
+ N->getValueType(0), N->getOperand(0), Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
+ CallOptions, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
+}
+
+//===----------------------------------------------------------------------===//
+// Float Operand Promotion
+//===----------------------------------------------------------------------===//
+//
+
+static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ return ISD::FP16_TO_FP;
+ } else if (RetVT == MVT::f16) {
+ return ISD::FP_TO_FP16;
+ }
+
+ report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
+bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a promotion-requiring floating point result, need to be legalized
+ // to use the promoted float operand. Nodes that produce at least one
+ // promotion-requiring floating point result have their operands legalized as
+ // a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+ case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
+ case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
+ }
+
+ if (R.getNode())
+ ReplaceValueWith(SDValue(N, 0), R);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op->getValueType(0);
+
+ SDValue Promoted = GetPromotedFloat(N->getOperand(0));
+ EVT PromotedVT = Promoted->getValueType(0);
+
+ // Convert the promoted float value to the desired IVT.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N),
+ IVT, Promoted);
+ // The final result type might not be an scalar so we need a bitcast. The
+ // bitcast will be further legalized if needed.
+ return DAG.getBitcast(N->getValueType(0), Convert);
+}
+
+// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by
+// PromoteFloatRes_FCOPYSIGN.
+SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
+ assert (OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), Op1);
+}
+
+// Convert the promoted float value to the desired integer type
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ // Desired VT is same as promoted type. Use promoted float directly.
+ if (VT == Op->getValueType(0))
+ return Op;
+
+ // Else, extend the promoted float value to the desired VT.
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
+}
+
+// Promote the float operands used for comparison. The true- and false-
+// operands have the same type as the result and are promoted, if needed, by
+// PromoteFloatRes_SELECT_CC
+SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ SDValue LHS = GetPromotedFloat(N->getOperand(0));
+ SDValue RHS = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+ LHS, RHS, N->getOperand(2), N->getOperand(3),
+ N->getOperand(4));
+}
+
+// Construct a SETCC that compares the promoted values and sets the conditional
+// code.
+SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+
+}
+
+// Lower the promoted Float down to the integer value of same size and construct
+// a STORE of the integer value.
+SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc DL(N);
+
+ SDValue Promoted = GetPromotedFloat(Val);
+ EVT VT = ST->getOperand(1).getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ SDValue NewVal;
+ NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL,
+ IVT, Promoted);
+
+ return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+// Float Result Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ // These opcodes cannot appear if promotion of FP16 is done in the backend
+ // instead of Clang
+ case ISD::FP16_TO_FP:
+ case ISD::FP_TO_FP16:
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator's result!");
+
+ case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;
+
+ case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break;
+
+ case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
+ case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ }
+
+ if (R.getNode())
+ SetPromotedFloat(SDValue(N, ResNo), R);
+}
+
+// Bitcast from i16 to f16: convert the i16 to a f32 value instead.
+// At this point, it is not possible to determine if the bitcast value is
+// eventually stored to memory or promoted to f32 or promoted to a floating
+// point at a higher precision. Some of these cases are handled by FP_EXTEND,
+// STORE promotion handlers.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+ // bitcast will be legalized further if necessary.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+ N->getOperand(0).getValueType().getSizeInBits());
+ SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL,
+ IVT);
+
+ // Convert the Constant to the desired FP type
+ // FIXME We might be able to do the conversion during compilation and get rid
+ // of it from the object code
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C);
+}
+
+// If the Index operand is a constant, try to redirect the extract operation to
+// the correct legalized vector. If not, bit-convert the input vector to
+// equivalent integer vector. Extract the element as an (bit-cast) integer
+// value and convert it to the promoted type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc DL(N);
+
+ // If the index is constant, try to extract the value from the legalized
+ // vector type.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ switch (getTypeAction(VecVT)) {
+ default: break;
+ case TargetLowering::TypeScalarizeVector: {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeWidenVector: {
+ Vec = GetWidenedVector(Vec);
+ SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeSplitVector: {
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ SDValue Res;
+ if (IdxVal < LoElts)
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx);
+ else
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, DL,
+ Idx.getValueType()));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ }
+ }
+
+ // Bit-convert the input vector to the equivalent integer vector
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ EVT IVT = NewOp.getValueType().getVectorElementType();
+
+ // Extract the element as an (bit-cast) integer value
+ SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT,
+ NewOp, N->getOperand(1));
+
+ // Convert the element to the desired FP type
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal);
+}
+
+// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result
+// needs promotion, so does the argument X. Note that Y, if needed, will be
+// handled during operand promotion.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Unary operation where the result and the operand have PromoteFloat type
+// action. Construct a new SDNode with the promoted float value of the old
+// operand.
+SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op);
+}
+
+// Binary operations where the result and both operands have PromoteFloat type
+// action. Construct a new SDNode with the promoted float values of the old
+// operands.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ SDValue Op2 = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2);
+}
+
+// Promote the Float (first) operand and retain the Integer (second) operand
+SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Explicit operation to reduce precision. Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = Op->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ // Round promoted float to desired precision
+ SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op);
+ // Promote it back to the legal output type
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+
+ // Load the value as an integer value with the same number of bits.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
+ SDLoc(N), L->getChain(), L->getBasePtr(),
+ L->getOffset(), L->getPointerInfo(), IVT,
+ L->getAlignment(),
+ L->getMemOperand()->getFlags(),
+ L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+ // Convert the integer value to the desired FP type
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
+}
+
+// Construct a new SELECT node with the promoted true- and false- values.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+ N->getOperand(0), TrueVal, FalseVal);
+}
+
+// Construct a new SELECT_CC node with the promoted true- and false- values.
+// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ TrueVal.getNode()->getValueType(0), N->getOperand(0),
+ N->getOperand(1), TrueVal, FalseVal, N->getOperand(4));
+}
+
+// Construct a SDNode that transforms the SINT or UINT operand to the promoted
+// float type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0));
+ // Round the value to the desired precision (that of the source type).
+ return DAG.getNode(
+ ISD::FP_EXTEND, DL, NVT,
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+ SDLoc SL(N);
+
+ SDValue CastVal = BitConvertToInteger(AM->getVal());
+ EVT CastVT = CastVal.getValueType();
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
+ DAG.getVTList(CastVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+
+ SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
+
+ return ResultCast;
+
+}
+
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000000000..d5c1b539adbde
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,4313 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SMIN:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::UMIN:
+ case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+
+ case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE:
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+
+ case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
+
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+ break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Res = PromoteIntRes_VECREDUCE(N);
+ break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1) {
+ assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+ N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+ N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res.getValue(1);
+ }
+
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDVTList VTs =
+ DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+ N->getBasePtr(), Op2, Op3, N->getMemOperand());
+ // Update the use to N with the newly created Res.
+ for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
+ ReplaceValueWith(SDValue(N, i), Res.getValue(i));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypePromoteFloat: {
+ // Convert the promoted float by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
+ break;
+ }
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ if (!NOutVT.isVector()) {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ break;
+ }
+ case TargetLowering::TypeWidenVector:
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ // If the output type is also a vector and widening it to the same size
+ // as the widened input type would be a legal type, we can widen the bitcast
+ // and handle the promotion after.
+ if (NOutVT.isVector()) {
+ unsigned WidenInSize = NInVT.getSizeInBits();
+ unsigned OutSize = OutVT.getSizeInBits();
+ if (WidenInSize % OutSize == 0) {
+ unsigned Scale = WidenInSize / OutSize;
+ EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
+ OutVT.getVectorElementType(),
+ OutVT.getVectorNumElements() * Scale);
+ if (isTypeLegal(WideOutVT)) {
+ InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+ DAG.getConstant(0, dl, IdxTy));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+ }
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
+static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ // If the value won't fit in the prefered type, just use something safe. It
+ // will be legalized when the shift is expanded.
+ if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ ShiftVT = MVT::i32;
+ return ShiftVT;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl, ShiftVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ SDLoc dl(N);
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ SDLoc dl(N);
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(
+ ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
+ NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
+ OVT.getScalarSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // If the input also needs to be promoted, do that first so we can get a
+ // get a good idea for the output type.
+ if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue In = GetPromotedInteger(Op0);
+
+ // If the new type is larger than NVT, use it. We probably won't need to
+ // promote it again.
+ EVT SVT = In.getValueType().getScalarType();
+ if (SVT.bitsGE(NVT)) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+ return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+ }
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ SDLoc dl(N);
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ //
+ // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
+ // before legalization: fp-to-uint16, 65534. -> 0xfffe
+ // after legalization: fp-to-sint32, 65534. -> 0x0000fffe
+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ SDLoc dl(N);
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getMemoryVT(), N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
+ N->getMask(), ExtPassThru, N->getMemoryVT(),
+ N->getMemOperand(), ISD::EXTLOAD);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ assert(NVT == ExtPassThru.getValueType() &&
+ "Gather result type and the passThru agrument type should be the same");
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
+ N->getIndex(), N->getScale() };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand(), N->getIndexType());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Change the return type of the boolean result while obeying
+ // getSetCCResultType.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT VT = N->getValueType(0);
+ EVT SVT = getSetCCResultType(VT);
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
+ unsigned NumOps = N->getNumOperands();
+ assert(NumOps <= 3 && "Too many operands");
+ if (NumOps == 3)
+ Ops[2] = N->getOperand(2);
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
+ makeArrayRef(Ops, NumOps));
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ // Convert to the expected type.
+ return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+ // If the promoted type is legal, we can convert this to:
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ // Else it is more efficient to convert this to a min and a max
+ // operation in the higher precision arithmetic.
+ SDLoc dl(N);
+ SDValue Op1 = N->getOperand(0);
+ SDValue Op2 = N->getOperand(1);
+ unsigned OldBits = Op1.getScalarValueSizeInBits();
+
+ unsigned Opcode = N->getOpcode();
+
+ SDValue Op1Promoted, Op2Promoted;
+ if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ Op1Promoted = ZExtPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else {
+ Op1Promoted = SExtPromotedInteger(Op1);
+ Op2Promoted = SExtPromotedInteger(Op2);
+ }
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned NewBits = PromotedType.getScalarSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ } else {
+ if (Opcode == ISD::USUBSAT) {
+ SDValue Max =
+ DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
+ }
+
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
+ // Can just promote the operands then continue with operation.
+ SDLoc dl(N);
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed =
+ N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating =
+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT OldType = N->getOperand(0).getValueType();
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned DiffSize =
+ PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+ if (Saturating) {
+ // Promoting the operand and result values changes the saturation width,
+ // which is extends the values that we clamp to on saturation. This could be
+ // resolved by shifting one of the operands the same amount, which would
+ // also shift the result we compare against, then shifting back.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ }
+ return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = N->getOperand(0);
+
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, SDLoc(N),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ EVT SVT = getSetCCResultType(InVT);
+
+ // If we got back a type that needs to be promoted, this likely means the
+ // the input type also needs to be promoted. So get the promoted type for
+ // the input and try the query again.
+ if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SVT = getSetCCResultType(InVT);
+ } else {
+ // Input type isn't promoted, just use the default promoted type.
+ SVT = NVT;
+ }
+ }
+
+ SDLoc dl(N);
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ // Convert to the expected type.
+ return DAG.getSExtOrTrunc(SetCC, dl, NVT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector: {
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+ case TargetLowering::TypeWidenVector: {
+ SDValue WideInOp = GetWidenedVector(InOp);
+
+ // Truncate widened InOp.
+ unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getValueType(0).getScalarType(), NumElem);
+ SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
+
+ // Zero extend so that the elements are of same type as those of NVT
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
+ NumElem);
+ SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
+
+ // Extract the low NVT subvector.
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
+ }
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // We need to sign-extend the operands so the carry value computed by the
+ // wide operation will be equivalent to the carry value computed by the
+ // narrow operation.
+ // An ADDCARRY can generate carry only if any of the operands has its
+ // most significant bit set. Sign extension propagates the most significant
+ // bit into the higher bits which means the extra bit that the narrow
+ // addition would need (i.e. the carry) will be propagated through the higher
+ // bits of the wide addition.
+ // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
+ // preserved by sign extension.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+
+ EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
+
+ // Do the arithmetic in the wide type.
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
+ LHS, RHS, N->getOperand(2));
+
+ // Update the users of the original carry/borrow value.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ return SDValue(Res.getNode(), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ SDLoc DL(N);
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
+
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred if the high part is non-zero.
+ unsigned Shift = SmallVT.getScalarSizeInBits();
+ EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
+ TLI, DAG);
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getConstant(Shift, DL, ShiftTy));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, DL, Hi.getValueType()),
+ ISD::SETNE);
+ } else {
+ // Signed overflow occurred if the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::FP16_TO_FP:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
+
+ case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
+
+ case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions. For those operations where either
+ // zero or sign extension would be valid, use SExtOrZExtPromotedInteger
+ // which will choose the cheapest for the target.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE: {
+ SDValue OpL = GetPromotedInteger(NewLHS);
+ SDValue OpR = GetPromotedInteger(NewRHS);
+
+ // We would prefer to promote the comparison operand with sign extension.
+ // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+ // than the width of NewLHS/NewRH, we can avoid inserting real truncate
+ // instruction, which is redundant eventually.
+ unsigned OpLEffectiveBits =
+ OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+ unsigned OpREffectiveBits =
+ OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
+ NewLHS = OpL;
+ NewRHS = OpR;
+ } else {
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
+ }
+ break;
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+ NewRHS = SExtOrZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ SDLoc dl(N);
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
+ // operand in place.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::VSELECT)
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ SDLoc dl(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ SDLoc dl(N);
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr,
+ N->getMemoryVT(), N->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+
+ SDValue DataOp = N->getValue();
+ EVT DataVT = DataOp.getValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(N);
+
+ bool TruncateStore = false;
+ if (OpNo == 3) {
+ Mask = PromoteTargetBoolean(Mask, DataVT);
+ // Update in place.
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[3] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ } else { // Data operand
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+ TruncateStore = true;
+ }
+
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
+ N->getMemoryVT(), N->getMemOperand(),
+ TruncateStore, N->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDLoc DL(N);
+
+ Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ return SDValue(
+ DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
+ // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1 && "Don't know how to promote this operand!");
+ // Promote the rw, locality, and cache type arguments to a supported integer
+ // width.
+ SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+ SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
+ SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ Op2, Op3, Op4),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Expected integer vector reduction");
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ Op = GetPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT VT = N->getValueType(0);
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+
+ // Result size must be >= element size. If this is not the case after
+ // promotion, also promote the result type and then truncate.
+ SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break;
+ case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue Tmp = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+ N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ AN->getMemOperand());
+
+ // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+ // success simply by comparing the loaded value against the ingoing
+ // comparison.
+ SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+ N->getOperand(2), ISD::SETEQ);
+
+ SplitInteger(Tmp, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Success);
+ ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::UMAX:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+/// N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+ // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+ if (!Amt) {
+ Lo = InL;
+ Hi = InH;
+ return;
+ }
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = InL;
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarSizeInBits();
+ unsigned NVTBits = NVT.getScalarSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
+
+ // If we don't know anything about the high bits, exit.
+ if (((Known.Zero|Known.One) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (Known.One.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, dl, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if (HighBitMask.isSubsetOf(Known.Zero)) {
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+ SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, DAG.getConstant(0, dl, ShTy),
+ ISD::SETEQ);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getSelect(dl, NVT, isZero, InH,
+ DAG.getSelect(dl, NVT, isShort, HiS, HiL));
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+}
+
+static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
+
+ switch (Op) {
+ default: llvm_unreachable("invalid min/max opcode");
+ case ISD::SMAX:
+ return std::make_pair(ISD::SETGT, ISD::UMAX);
+ case ISD::UMAX:
+ return std::make_pair(ISD::SETUGT, ISD::UMAX);
+ case ISD::SMIN:
+ return std::make_pair(ISD::SETLT, ISD::UMIN);
+ case ISD::UMIN:
+ return std::make_pair(ISD::SETULT, ISD::UMIN);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ // Value types
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ // Hi part is always the same op
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ bool HasOpCarry = TLI.isOperationLegalOrCustom(
+ N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasOpCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+ }
+ return;
+ }
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+ return;
+ }
+
+ bool hasOVF =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::UADDO : ISD::USUBO,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+
+ if (hasOVF) {
+ EVT OvfVT = getSetCCResultType(NVT);
+ SDVTList VTList = DAG.getVTList(NVT, OvfVT);
+ int RevOpc;
+ if (N->getOpcode() == ISD::ADD) {
+ RevOpc = ISD::SUB;
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ } else {
+ RevOpc = ISD::ADD;
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ }
+ SDValue OVF = Lo.getValue(1);
+
+ switch (BoolType) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
+ LLVM_FALLTHROUGH;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
+ Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
+ SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ return;
+ }
+
+ SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
+ DAG.getConstant(1, dl, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ SDValue Cmp =
+ DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+
+ SDValue Borrow;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc dl(N);
+
+ SDValue Ovf;
+
+ bool HasOpCarry = TLI.isOperationLegalOrCustom(
+ N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasOpCarry) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ovf);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH, SDValue() };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, dl, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ auto Constant = cast<ConstantSDNode>(N);
+ const APInt &Cst = Constant->getAPIntValue();
+ bool IsTarget = Constant->isTargetOpcode();
+ bool IsOpaque = Constant->isOpaque();
+ SDLoc dl(N);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
+ IsOpaque);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+
+ // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, dl, VT), N0);
+ SDValue NegLo, NegHi;
+ SplitInteger(Neg, NegLo, NegHi);
+
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
+ DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+ Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+ Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+ Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+ // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ unsigned Alignment = N->getAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
+ Alignment, MMOFlags, AAInfo);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, dl, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ Alignment, MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(
+ ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
+ Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+
+ if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH))
+ return;
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ unsigned Bits = NVT.getSizeInBits();
+ unsigned HalfBits = Bits >> 1;
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
+ NVT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
+
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+ if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
+ // The type from TLI is too small to fit the shift amount we want.
+ // Override it with i32. The shift will have to be legalized.
+ ShiftAmtTy = MVT::i32;
+ }
+ SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
+ SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
+ SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
+
+ SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
+ DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
+ DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
+ DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ return;
+ }
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandAddSubSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ unsigned VTSize = VT.getScalarSizeInBits();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ uint64_t Scale = N->getConstantOperandVal(2);
+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
+ N->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+
+ // Handle special case when scale is equal to zero.
+ if (!Scale) {
+ SDValue Result;
+ if (!Saturating) {
+ Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else {
+ EVT BoolVT = getSetCCResultType(VT);
+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ if (Signed) {
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else {
+ // For unsigned multiplication, we only need to check the max since we
+ // can't really overflow towards zero.
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
+ }
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].
+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(LHS, LL, LH);
+ GetExpandedInteger(RHS, RL, RH);
+ SmallVector<SDValue, 4> Result;
+
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH)) {
+ report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
+ return;
+ }
+
+ unsigned NVTSize = NVT.getScalarSizeInBits();
+ assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+ "the size of the current value type");
+ EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+
+ // After getting the multiplication result in 4 parts, we need to perform a
+ // shift right by the amount of the scale to get the result in that scale.
+ //
+ // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
+ // 128 bits that are cut into 4 32-bit parts:
+ //
+ // HH HL LH LL
+ // |---32---|---32---|---32---|---32---|
+ // 128 96 64 32 0
+ //
+ // |------VTSize-----|
+ //
+ // |NVTSize-|
+ //
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
+
+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.
+ if (!Saturating)
+ return;
+
+ // Can not overflow when there is no integer part.
+ if (Scale == VTSize)
+ return;
+
+ // To handle saturation we must check for overflow in the multiplication.
+ //
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
+ // aren't all zeroes.
+ //
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
+ // aren't all ones or all zeroes.
+ //
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of signed
+ // saturation.
+
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
+ if (!Signed) {
+ if (Scale < NVTSize) {
+ // Overflow happened if ((HH | (HL >> Scale)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
+ } else if (Scale == NVTSize) {
+ // Overflow happened if (HH != 0).
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
+ } else if (Scale < VTSize) {
+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale - NVTSize, dl,
+ ShiftTy));
+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
+ } else
+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
+ "(and saturation can't happen with Scale==VTSize).");
+
+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
+ return;
+ }
+
+ if (Scale < NVTSize) {
+ // The number of overflow bits we can check are VTSize - Scale + 1 (we
+ // include the sign bit). If these top bits are > 0, then we overflowed past
+ // the max value. If these top bits are < -1, then we overflowed past the
+ // min value. Otherwise, we did not overflow.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
+ } else if (Scale == NVTSize) {
+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
+ } else if (Scale < VTSize) {
+ // This is similar to the case when we saturate if Scale < NVTSize, but we
+ // only need to check HH.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ } else
+ llvm_unreachable("Illegal scale for signed fixed point mul.");
+
+ // Saturate to signed maximum.
+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
+ // Saturate to signed minimum.
+ APInt MinHi = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDLoc dl(Node);
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it. Don't lower this to SHL_PARTS when we optimise for
+ // size, but create a libcall instead.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ const bool LegalOrCustom =
+ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom;
+
+ if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ assert(ShiftTy.getScalarSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits()) &&
+ "ShiftAmountTy is too small to cover the range of this type!");
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(
+ ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (N->getOpcode() == ISD::UMULO) {
+ // This section expands the operation into the following sequence of
+ // instructions. `iNh` here refers to a type which has half the bit width of
+ // the type the original operation operated on.
+ //
+ // %0 = %LHS.HI != 0 && %RHS.HI != 0
+ // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
+ // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
+ // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
+ // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
+ // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+ //
+ // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
+ SplitInteger(LHS, LHSLow, LHSHigh);
+ SplitInteger(RHS, RHSLow, RHSHigh);
+ EVT HalfVT = LHSLow.getValueType()
+ , BitVT = N->getValueType(1);
+ SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
+ SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+
+ SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
+ SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
+ DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
+ DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
+
+ SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
+ SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ One.getValue(0));
+
+ SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
+ SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+ Two.getValue(0));
+
+ // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
+ // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
+ // operation recursively legalized?).
+ //
+ // Many backends understand this pattern and will convert into LOHI
+ // themselves, if applicable.
+ SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
+ SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
+ SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
+ Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
+ SplitInteger(Five, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain =
+ DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
+ MachinePointerInfo());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : N->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 =
+ DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, dl, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
+
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
+ // both halves independently.
+ SDValue Res = TLI.expandVecReduce(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ report_fatal_error("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // LoCmp = lo(op1) < lo(op2) // Always unsigned comparison
+ // HiCmp = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? LoCmp : HiCmp;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
+ nullptr);
+ SDValue LoCmp, HiCmp;
+ if (TLI.isTypeLegal(LHSLo.getValueType()) &&
+ TLI.isTypeLegal(RHSLo.getValueType()))
+ LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!LoCmp.getNode())
+ LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC);
+ if (TLI.isTypeLegal(LHSHi.getValueType()) &&
+ TLI.isTypeLegal(RHSHi.getValueType()))
+ HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!HiCmp.getNode())
+ HiCmp =
+ DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
+ ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
+
+ bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+
+ if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
+ (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
+ (LoCmpC && LoCmpC->isNullValue())))) {
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT: if low part is known false, return the high part.
+ // if high part is known true, ignore the low part.
+ NewLHS = HiCmp;
+ NewRHS = SDValue();
+ return;
+ }
+
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = LoCmp;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCCARRY if the target supports it.
+ EVT HiVT = LHSHi.getValueType();
+ EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
+ bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
+
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (HasSETCCCARRY) {
+ // SETCCCARRY can detect < and >= directly. For > and <=, flip
+ // operands and condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
+ // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
+ // zero or positive iff LHS >= RHS.
+ EVT LoVT = LHSLo.getValueType();
+ SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
+ SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
+ LHSHi, RHSHi, LowCmp.getValue(1),
+ DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
+ false, DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS =
+ DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
+ SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), Alignment, MMOFlags, AAInfo);
+ }
+
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Hi = DAG.getTruncStore(
+ Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment,
+ MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ getSetCCResultType(Hi.getValueType()),
+ Hi,
+ DAG.getConstant(0, dl, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr =
+ DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0, dl);
+ SDValue Four = DAG.getIntPtrConstant(4, dl);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Zero, Four);
+ SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
+ FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand());
+ return Swap.getValue(1);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue BaseIdx = N->getOperand(1);
+
+ SDValue InOp0 = N->getOperand(0);
+ if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
+ InOp0 = GetPromotedInteger(N->getOperand(0));
+
+ EVT InVT = InOp0.getValueType();
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op;
+ // BUILD_VECTOR integer operand types are allowed to be larger than the
+ // result's element type. This may still be true after the promotion. For
+ // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
+ // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
+ if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ else
+ Op = N->getOperand(i);
+ Ops.push_back(Op);
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ SDLoc dl(N);
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue SplatVal = N->getOperand(0);
+
+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ EVT SclrTy = Op.getValueType().getVectorElementType();
+ assert(NumElem == Op.getValueType().getVectorNumElements() &&
+ "Unexpected number of elements");
+
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
+ }
+ }
+
+ return DAG.getBuildVector(NOutVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.isVector() && "This type must be promoted to a vector type");
+
+ SDLoc dl(N);
+
+ // For operands whose TypeAction is to promote, extend the promoted node
+ // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
+ // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
+ // type..
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Promoted;
+
+ switch(N->getOpcode()) {
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Promoted = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Promoted = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Promoted = GetPromotedInteger(N->getOperand(0));
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
+ }
+
+ // Directly extend to the appropriate transform-to type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
+ // The VECREDUCE result size may be larger than the element size, so
+ // we can simply change the result type.
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ MVT InVT = V0.getValueType().getSimpleVT();
+ MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+ unsigned NumElems = N->getNumOperands();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000000000..b596c174a2877
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1092 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SDNode &Node : DAG.allnodes()) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
+
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
+ bool Failed = false;
+ // Don't create a value in map.
+ auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
+
+ unsigned Mapped = 0;
+ if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ auto NewValId = ReplacedValues[ResId];
+ auto I = ReplacedValues.find(NewValId);
+ while (I != ReplacedValues.end()) {
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
+ }
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (ResId && SplitVectors.find(ResId) != SplitVectors.end())
+ Mapped |= 64;
+ if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())
+ Mapped |= 128;
+ if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
+ Mapped |= 256;
+
+ if (Node.getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ if (Mapped & 256)
+ dbgs() << " PromotedFloats";
+ dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// This is the main entry point for the type legalizer. This does a top-down
+/// traversal of the dag, legalizing types as it goes. Returns "true" if it made
+/// any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ AddToWorklist(&Node);
+ } else {
+ Node.setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ if (IgnoreNodeResults(N)) {
+ LLVM_DEBUG(dbgs() << "Ignoring node results\n");
+ goto ScanOperands;
+ }
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString()
+ << "\n");
+ switch (getTypeAction(ResultVT)) {
+ case TargetLowering::TypeLegal:
+ LLVM_DEBUG(dbgs() << "Legal result type\n");
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypePromoteFloat:
+ PromoteFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ const auto Op = N->getOperand(i);
+ LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
+ EVT OpVT = Op.getValueType();
+ switch (getTypeAction(OpVT)) {
+ case TargetLowering::TypeLegal:
+ LLVM_DEBUG(dbgs() << "Legal operand\n");
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypePromoteFloat:
+ NeedsReanalyzing = PromoteFloatOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
+ dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SDNode &Node : DAG.allnodes()) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump(&DAG);
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump(&DAG);
+ Failed = true;
+ }
+
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (Node.getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (Node.getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (Node.getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ Node.dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// The specified node is the root of a subtree of potentially new nodes.
+/// Correct any processed operands (this may change the node) and calculate the
+/// NodeId. If the node itself changes to a processed node, it is not remapped -
+/// the caller needs to take care of this. Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ std::vector<SDValue> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, NewOps);
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &V) {
+ auto Id = getTableId(V);
+ V = getSDValue(Id);
+}
+
+void DAGTypeLegalizer::RemapId(TableId &Id) {
+ auto I = ReplacedValues.find(Id);
+ if (I != ReplacedValues.end()) {
+ assert(Id != I->second && "Id is mapped to itself.");
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapId(I->second);
+ Id = I->second;
+
+ // Note that N = IdToValueMap[Id] it is possible to have
+ // N.getNode()->getNodeId() == NewNode at this point because it is possible
+ // for a node to be put in the map before being processed.
+ }
+}
+
+namespace {
+ /// This class is a DAGUpdateListener that listens for updates to nodes and
+ /// recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ void NodeUpdated(SDNode *N) override {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// The specified value was legalized to the specified other value.
+/// Update the DAG and NodeIds replacing any uses of From to use To instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ AnalyzeNewValue(To);
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+
+ // The old node may be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ auto FromId = getTableId(From);
+ auto ToId = getTableId(To);
+
+ if (FromId != ToId)
+ ReplacedValues[FromId] = ToId;
+ DAG.ReplaceAllUsesOfValueWith(From, To);
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ auto OldValId = getTableId(OldVal);
+ auto NewValId = getTableId(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ if (OldValId != NewValId)
+ ReplacedValues[OldValId] = NewValId;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+ Result->setFlags(Op->getFlags());
+
+ DAG.transferDbgValues(Op, Result);
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already converted to integer!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted float");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = PromotedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = ScalarizedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already scalarized!");
+ OpIdEntry = getTableId(Result);
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Transfer debug values. Don't invalidate the source debug value until it's
+ // been transferred to the high and low bits.
+ if (DAG.getDataLayout().isBigEndian()) {
+ DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(),
+ Lo.getValueSizeInBits());
+ } else {
+ DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(),
+ Hi.getValueSizeInBits());
+ }
+
+ // Remember that this is the result of the node.
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+ assert(Lo.getNode() && "Operand isn't split");
+ ;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already split");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = WidenedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node already widened!");
+ OpIdEntry = getTableId(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueSizeInBits();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// Convert to a vector of integers of the same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getScalarValueSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ auto EltCnt = Op.getValueType().getVectorElementCount();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ SDLoc dl(Op);
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo());
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo());
+}
+
+/// Replace the node's results with custom code provided by the target and
+/// return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
+ // provide the same kind of custom splitting behavior.
+ if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
+ // We've legalized a return type by splitting it. If there is a chain,
+ // replace that too.
+ SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
+ if (N->getNumValues() > 1)
+ ReplaceValueWith(SDValue(N, 1), Results[2]);
+ return true;
+ }
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ }
+ return true;
+}
+
+
+/// Widen the node's results with custom code provided by the target and return
+/// "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ // If this is a chain output just replace it.
+ if (Results[i].getValueType() == MVT::Other)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ else
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ }
+ return true;
+}
+
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N->getOperand(ResNo));
+}
+
+/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+/// given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Pair);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0, dl));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1, dl));
+}
+
+/// Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result SDLoc
+ SDLoc dlHi(Hi);
+ SDLoc dlLo(Lo);
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ unsigned NumOps = N->getNumOperands();
+ SDLoc dl(N);
+ if (NumOps == 0) {
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
+ dl).first;
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
+ dl).first;
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
+ dl).first;
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
+}
+
+/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
+/// the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+/// Promote the given target boolean to a target boolean of the given type.
+/// A target boolean is an integer value, not necessarily of type i1, the bits
+/// of which conform to getBooleanContents.
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
+ SDLoc dl(Bool);
+ EVT BoolVT = getSetCCResultType(ValVT);
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
+ return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
+}
+
+/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Op);
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ unsigned ReqShiftAmountInBits =
+ Log2_32_Ceil(Op.getValueType().getSizeInBits());
+ MVT ShiftAmountTy =
+ TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType());
+ if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits())
+ ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits));
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// Return the lower and upper halves of Op's bits in a value type half the
+/// size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT =
+ EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// This transforms the SelectionDAG into a SelectionDAG that only uses types
+/// natively supported by the target. Returns "true" if it made any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000000000..4afbae69128a1
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,962 @@
+//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and hacks on it until only
+/// value types the target machine can handle are left. This involves promoting
+/// small sizes to large sizes or splitting up large values into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ /// This pass uses the NodeId on the SDNodes to hold information about the
+ /// state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// All operands have been processed, so this node is ready to be handled.
+ ReadyToProcess = 0,
+
+ /// This is a new node, not before seen, that was created in the process of
+ /// legalizing some other node.
+ NewNode = -1,
+
+ /// This node's ID needs to be set to the number of its unprocessed
+ /// operands.
+ Unanalyzed = -2,
+
+ /// This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// This is a bitvector that contains two bits for each simple value type,
+ /// where the two bits correspond to the LegalizeAction enum from
+ /// TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ /// Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant ||
+ N->getOpcode() == ISD::Register;
+ }
+
+ // Bijection from SDValue to unique id. As each created node gets a
+ // new id we do not need to worry about reuse expunging. Should we
+ // run out of ids, we can do a one time expensive compactifcation.
+ typedef unsigned TableId;
+
+ TableId NextValueId = 1;
+
+ SmallDenseMap<SDValue, TableId, 8> ValueToIdMap;
+ SmallDenseMap<TableId, SDValue, 8> IdToValueMap;
+
+ /// For integer nodes that are below legal width, this map indicates what
+ /// promoted value to use.
+ SmallDenseMap<TableId, TableId, 8> PromotedIntegers;
+
+ /// For integer nodes that need to be expanded this map indicates which
+ /// operands are the expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers;
+
+ /// For floating-point nodes converted to integers of the same size, this map
+ /// indicates the converted value to use.
+ SmallDenseMap<TableId, TableId, 8> SoftenedFloats;
+
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates what promoted value to use.
+ SmallDenseMap<TableId, TableId, 8> PromotedFloats;
+
+ /// For float nodes that need to be expanded this map indicates which operands
+ /// are the expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
+
+ /// For nodes that are <1 x ty>, this map indicates the scalar value of type
+ /// 'ty' to use.
+ SmallDenseMap<TableId, TableId, 8> ScalarizedVectors;
+
+ /// For nodes that need to be split this map indicates which operands are the
+ /// expanded version of the input.
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors;
+
+ /// For vector nodes that need to be widened, indicates the widened value to
+ /// use.
+ SmallDenseMap<TableId, TableId, 8> WidenedVectors;
+
+ /// For values that have been replaced with another, indicates the replacement
+ /// value to use.
+ SmallDenseMap<TableId, TableId, 8> ReplacedValues;
+
+ /// This defines a worklist of nodes to process. In order to be pushed onto
+ /// this worklist, all operands of a node must have already been processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+ TableId getTableId(SDValue V) {
+ assert(V.getNode() && "Getting TableId on SDValue()");
+
+ auto I = ValueToIdMap.find(V);
+ if (I != ValueToIdMap.end()) {
+ // replace if there's been a shift.
+ RemapId(I->second);
+ assert(I->second && "All Ids should be nonzero");
+ return I->second;
+ }
+ // Add if it's not there.
+ ValueToIdMap.insert(std::make_pair(V, NextValueId));
+ IdToValueMap.insert(std::make_pair(NextValueId, V));
+ ++NextValueId;
+ assert(NextValueId != 0 &&
+ "Ran out of Ids. Increase id type size or add compactification");
+ return NextValueId - 1;
+ }
+
+ const SDValue &getSDValue(TableId &Id) {
+ RemapId(Id);
+ assert(Id && "TableId should be non-zero");
+ return IdToValueMap[Id];
+ }
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ TableId NewId = getTableId(SDValue(New, i));
+ TableId OldId = getTableId(SDValue(Old, i));
+
+ if (OldId != NewId)
+ ReplacedValues[OldId] = NewId;
+
+ // Delete Node from tables.
+ ValueToIdMap.erase(SDValue(Old, i));
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+ }
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void PerformExpensiveChecks();
+ void RemapId(TableId &Id);
+ void RemapValue(SDValue &V);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// Replace each result of the given MERGE_VALUES node with the corresponding
+ /// input operand, except for the result 'ResNo', for which the corresponding
+ /// input operand is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ void AddToWorklist(SDNode *N) {
+ N->setNodeId(ReadyToProcess);
+ Worklist.push_back(N);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was promoted to a larger integer type,
+ /// this returns the promoted value. The low bits of the promoted value
+ /// corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ TableId &PromotedId = PromotedIntegers[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// Get a promoted operand and sign extend it to the final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ }
+
+ // Get a promoted operand and sign or zero extend it to the final size
+ // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
+ // subtarget and type, the choice of sign or zero-extension will be
+ // consistent.
+ SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = GetPromotedInteger(Op);
+ if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+ SDValue PromoteIntRes_MULFIX(SDNode *N);
+ SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+ SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_ABS(SDNode *N);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
+ SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MULFIX(SDNode *N);
+ SDValue PromoteIntOp_FPOWI(SDNode *N);
+ SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two integers of half
+ /// the size, this returns the two halves. The low bits of Op are exactly
+ /// equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op
+ SDValue GetSoftenedFloat(SDValue Op) {
+ TableId Id = getTableId(Op);
+ auto Iter = SoftenedFloats.find(Id);
+ if (Iter == SoftenedFloats.end()) {
+ assert(isSimpleLegalType(Op.getValueType()) &&
+ "Operand wasn't converted to integer?");
+ return Op;
+ }
+ SDValue SoftenedOp = getSDValue(Iter->second);
+ assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Convert Float Results to Integer.
+ void SoftenFloatResult(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FMINNUM(SDNode *N);
+ SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FROUND(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Convert Float Operand to Integer.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_LROUND(SDNode *N);
+ SDValue SoftenFloatOp_LLROUND(SDNode *N);
+ SDValue SoftenFloatOp_LRINT(SDNode *N);
+ SDValue SoftenFloatOp_LLRINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two floating-point
+ /// values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_LROUND(SDNode *N);
+ SDValue ExpandFloatOp_LLROUND(SDNode *N);
+ SDValue ExpandFloatOp_LRINT(SDNode *N);
+ SDValue ExpandFloatOp_LLRINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetPromotedFloat(SDValue Op) {
+ TableId &PromotedId = PromotedFloats[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedFloat(SDValue Op, SDValue Result);
+
+ void PromoteFloatResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteFloatRes_BITCAST(SDNode *N);
+ SDValue PromoteFloatRes_BinOp(SDNode *N);
+ SDValue PromoteFloatRes_ConstantFP(SDNode *N);
+ SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue PromoteFloatRes_FMAD(SDNode *N);
+ SDValue PromoteFloatRes_FPOWI(SDNode *N);
+ SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+ SDValue PromoteFloatRes_LOAD(SDNode *N);
+ SDValue PromoteFloatRes_SELECT(SDNode *N);
+ SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
+ SDValue PromoteFloatRes_UnaryOp(SDNode *N);
+ SDValue PromoteFloatRes_UNDEF(SDNode *N);
+ SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
+ SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+
+ bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed one-element vector Op which was scalarized to its
+ /// element type, this returns the element. For example, if Op is a v1i32,
+ /// Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)];
+ SDValue ScalarizedOp = getSDValue(ScalarizedId);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
+ SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+ SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+
+ SDValue ScalarizeVecRes_MULFIX(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_VSELECT(SDNode *N);
+ SDValue ScalarizeVecOp_VSETCC(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was split into vectors of half the size,
+ /// this method returns the halves. The first elements of Op coincide with the
+ /// elements of Lo; the remaining elements of Op coincide with the elements of
+ /// Hi: Op is what you would get by concatenating Lo and Hi.
+ /// For example, if Op is a v8i32 that was split into two v4i32's, then this
+ /// method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned ResNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+ void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+ SDValue SplitVecOp_TruncateHelper(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was widened into a larger vector, this
+ /// method returns the larger vector. The elements of the returned vector
+ /// consist of the elements of Op followed by elements containing rubbish.
+ /// For example, if Op is a v2i32 that was widened to a v4i32, then this
+ /// method returns a v4i32 for which the first two elements are the same as
+ /// those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ TableId &WidenedId = WidenedVectors[getTableId(Op)];
+ SDValue WidenedOp = getSDValue(WidenedId);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVSELECTAndMask(SDNode *N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+
+ SDValue WidenVecRes_Ternary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
+ SDValue WidenVecRes_StrictFP(SDNode *N);
+ SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTEND(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_VSELECT(SDNode *N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE(SDNode *N);
+
+ /// Helper function to generate a set of operations to perform
+ /// a vector operation for a wider type.
+ ///
+ SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper function to generate a set of loads to load a vector with a
+ /// resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD);
+
+ /// Helper function to generate a set of extension loads to load a vector with
+ /// a resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper function to generate a set of stores to store a widen vector into
+ /// non-widen memory.
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+
+ /// Helper function to generate a set of stores to store a truncate widen
+ /// vector into non-widen memory.
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ /// When FillWithZeroes is "on" the vector will be widened with zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+
+ /// Return a mask of vector type MaskVT to replace InMask. Also adjust
+ /// MaskVT to ToMaskVT if needed with vector extension or truncation.
+ SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+ /// given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVSETCC(const SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+
+ /// This function will split the integer \p Op into \p NumElements
+ /// operations of type \p EltVT and store them in \p Ops.
+ void IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops, EVT EltVT);
+
+ // Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000000000..5562f400b6e1d
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,561 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypePromoteFloat:
+ llvm_unreachable("Bitcast of a promotion-needing float should never need"
+ "expansion");
+ case TargetLowering::TypeSoftenFloat:
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat: {
+ auto &DL = DAG.getDataLayout();
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(InVT, DL) !=
+ TLI.hasBigEndianPartOrdering(OutVT, DL))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(
+ ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
+ NOutVT.getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl,
+ StackPtr.getValueType()));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
+ SDLoc dl(N);
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ SDLoc dl(N);
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT ValueVT = LD->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ unsigned Alignment = LD->getAlignment();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment,
+ LD->getMemOperand()->getFlags(), AAInfo);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDLoc dl(N);
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+ Chain = Hi.getValue(1);
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops,
+ EVT EltVT) {
+ assert(Op.getValueType().isInteger());
+ SDLoc DL(Op);
+ SDValue Parts[2];
+
+ if (NumElements > 1) {
+ NumElements >>= 1;
+ SplitInteger(Op, Parts[0], Parts[1]);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+ IntegerToVector(Parts[0], NumElements, Ops, EltVT);
+ IntegerToVector(Parts[1], NumElements, Ops, EltVT);
+ } else {
+ Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isInteger()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ //
+ // FIXME: I'm not sure why we are first trying to split the input into
+ // a 2 element vector, so I'm leaving it here to maintain the current
+ // behavior.
+ unsigned NumElts = 2;
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ NumElts);
+ if (!isTypeLegal(NVT)) {
+ // If we can't find a legal type by splitting the integer in half,
+ // then we can use the node's value type.
+ NumElts = N->getValueType(0).getVectorNumElements();
+ NVT = N->getValueType(0);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
+
+ SDValue Vec =
+ DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts));
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ SDLoc dl(N);
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ SmallVector<SDValue, 16> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size());
+ SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ SDLoc dl(N);
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT ValueVT = St->getValue().getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ unsigned Alignment = St->getAlignment();
+ AAMDNodes AAInfo = St->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
+ St->getMemOperand()->getFlags(), AAInfo);
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize),
+ St->getMemOperand()->getFlags(), AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ // Check if there are already splitted versions of the vector available and
+ // use those instead of splitting the mask operand again.
+ else if (getTypeAction(Cond.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Cond, CL, CH);
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000000000..15c3a0b6cfadf
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,1444 @@
+//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "legalizevectorops"
+
+namespace {
+
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed = false; // Keep track of whether anything changed
+
+ /// For nodes that are of legal width, and that have more than one use, this
+ /// map indicates what regularized operand to use. This allows us to avoid
+ /// legalizing the same thing more than once.
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
+
+ /// Adds a node to the translation cache.
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ /// Legalizes the given node.
+ SDValue LegalizeOp(SDValue Op);
+
+ /// Assuming the node is legal, "legalize" the results.
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+
+ /// Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+
+ /// Implement expand-based legalization of vector operations.
+ ///
+ /// This is just a high-level routine to dispatch to specific code paths for
+ /// operations to legalize them.
+ SDValue Expand(SDValue Op);
+
+ /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
+ /// FP_TO_SINT isn't legal.
+ SDValue ExpandFP_TO_UINT(SDValue Op);
+
+ /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ /// SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+
+ /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
+
+ /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+ /// type. The contents of the bits in the extended part of each element are
+ /// undef.
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+ /// type, then shifts left and arithmetic shifts right to introduce a sign
+ /// extension.
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// Implement expand-based legalization of ABS vector operations.
+ /// If following expanding is legal/custom then do it:
+ /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
+ /// else unroll the operation.
+ SDValue ExpandABS(SDValue Op);
+
+ /// Expand bswap of vectors into a shuffle if legal.
+ SDValue ExpandBSWAP(SDValue Op);
+
+ /// Implement vselect in terms of XOR, AND, OR when blend is not
+ /// supported by the target.
+ SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
+ SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandFSUB(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTPOP(SDValue Op);
+ SDValue ExpandCTLZ(SDValue Op);
+ SDValue ExpandCTTZ(SDValue Op);
+ SDValue ExpandFunnelShift(SDValue Op);
+ SDValue ExpandROT(SDValue Op);
+ SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+ SDValue ExpandUADDSUBO(SDValue Op);
+ SDValue ExpandSADDSUBO(SDValue Op);
+ SDValue ExpandMULO(SDValue Op);
+ SDValue ExpandAddSubSat(SDValue Op);
+ SDValue ExpandFixedPointMul(SDValue Op);
+ SDValue ExpandStrictFPOp(SDValue Op);
+
+ /// Implements vector promotion.
+ ///
+ /// This is essentially just bitcasting the operands to a different type and
+ /// bitcasting the result back to the original type.
+ SDValue Promote(SDValue Op);
+
+ /// Implements [SU]INT_TO_FP vector promotion.
+ ///
+ /// This is a [zs]ext of the input operand to a larger integer type.
+ SDValue PromoteINT_TO_FP(SDValue Op);
+
+ /// Implements FP_TO_[SU]INT vector promotion of the result type.
+ ///
+ /// It is promoted to a larger integer type. The result is then
+ /// truncated back to the original type.
+ SDValue PromoteFP_TO_INT(SDValue Op);
+
+public:
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
+
+ /// Begin legalizer the vector operations in the DAG.
+ bool Run();
+};
+
+} // end anonymous namespace
+
+bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+ J != E; ++J)
+ HasVectors |= J->isVector();
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
+ LegalizeOp(SDValue(&*I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (const SDValue &Op : Node->op_values())
+ Ops.push_back(LegalizeOp(Op));
+
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
+ Op.getResNo());
+
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
+ Node->dump(&DAG));
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
+ LD->getMemoryVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ assert(Lowered->getNumValues() == Op->getNumValues() &&
+ "Unexpected number of results");
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
+ }
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ LLVM_FALLTHROUGH;
+ case TargetLowering::Expand:
+ Changed = true;
+ return ExpandLoad(Op);
+ }
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore()) {
+ LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
+ Node->dump(&DAG));
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom: {
+ SDValue Lowered = TLI.LowerOperation(Result, DAG);
+ if (Lowered != Result) {
+ // Make sure the new code is also legal.
+ Lowered = LegalizeOp(Lowered);
+ Changed = true;
+ }
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ case TargetLowering::Expand:
+ Changed = true;
+ return ExpandStore(Op);
+ }
+ }
+ }
+
+ bool HasVectorValueOrOp = false;
+ for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
+ HasVectorValueOrOp |= J->isVector();
+ for (const SDValue &Op : Node->op_values())
+ HasVectorValueOrOp |= Op.getValueType().isVector();
+
+ if (!HasVectorValueOrOp)
+ return TranslateLegalizeResults(Op, Result);
+
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // If we're asked to expand a strict vector floating-point operation,
+ // by default we're going to simply unroll it. That is usually the
+ // best approach, except in the case where the resulting strict (scalar)
+ // operations would themselves use the fallback mutation to non-strict.
+ // In that specific case, just do the fallback on the vector op.
+ if (Action == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal) {
+ EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Legal)
+ Action = TargetLowering::Legal;
+ }
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::ABS:
+ case ISD::BSWAP:
+ case ISD::BITREVERSE:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::FCOPYSIGN:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ case ISD::FMA:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FCANONICALIZE:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
+ unsigned Scale = Node->getConstantOperandVal(2);
+ Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+ Node->getValueType(0), Scale);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+
+ switch (Action) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Promote:
+ Result = Promote(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal:
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
+ break;
+ case TargetLowering::Custom: {
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
+ if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
+ Result = Tmp1;
+ break;
+ }
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_FALLTHROUGH;
+ }
+ case TargetLowering::Expand:
+ Result = Expand(Op);
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::Promote(SDValue Op) {
+ // For a few operations there is a specific concept for promotion based on
+ // the operand's type.
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ return PromoteINT_TO_FP(Op);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ return PromoteFP_TO_INT(Op);
+ }
+
+ // There are currently two cases of vector promotion:
+ // 1) Bitcasting a vector of integers to a different type to a vector of the
+ // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+ // 2) Extending a vector of floats to a vector of the same number of larger
+ // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
+ MVT VT = Op.getSimpleValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ SDLoc dl(Op);
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ if (Op.getOperand(j)
+ .getValueType()
+ .getVectorElementType()
+ .isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
+ if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
+ else
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
+
+ SDLoc dl(Op);
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
+}
+
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result. This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
+ MVT VT = Op.getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
+
+ unsigned NewOpc = Op->getOpcode();
+ // Change FP_TO_UINT to FP_TO_SINT if possible.
+ // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
+ if (NewOpc == ISD::FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDLoc dl(Op);
+ SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NVT, Promoted,
+ DAG.getValueType(VT.getScalarType()));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+}
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+
+ EVT SrcVT = LD->getMemoryVT();
+ EVT SrcEltVT = SrcVT.getScalarType();
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ SDValue NewChain;
+ SDValue Value;
+ if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ // When elements in a vector is not byte-addressable, we cannot directly
+ // load each element by advancing pointer, which could only address bytes.
+ // Instead, we load all significant words, mask bits off, and concatenate
+ // them to form each element. Finally, they are extended to destination
+ // scalar type to build the destination vector.
+ EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
+
+ assert(WideVT.isRound() &&
+ "Could not handle the sophisticated case when the widest integer is"
+ " not power of 2.");
+ assert(WideVT.bitsGE(SrcEltVT) &&
+ "Type is not legalized?");
+
+ unsigned WideBytes = WideVT.getStoreSize();
+ unsigned Offset = 0;
+ unsigned RemainingBytes = SrcVT.getStoreSize();
+ SmallVector<SDValue, 8> LoadVals;
+ while (RemainingBytes > 0) {
+ SDValue ScalarLoad;
+ unsigned LoadBytes = WideBytes;
+
+ if (RemainingBytes >= LoadBytes) {
+ ScalarLoad =
+ DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ } else {
+ EVT LoadVT = WideVT;
+ while (RemainingBytes < LoadBytes) {
+ LoadBytes >>= 1; // Reduce the load size by half.
+ LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+ }
+ ScalarLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset), LoadVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ }
+
+ RemainingBytes -= LoadBytes;
+ Offset += LoadBytes;
+
+ BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ unsigned BitOffset = 0;
+ unsigned WideIdx = 0;
+ unsigned WideBits = WideVT.getSizeInBits();
+
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
+
+ for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+ assert(BitOffset < WideBits && "Unexpected offset!");
+
+ SDValue ShAmt = DAG.getConstant(
+ BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+
+ BitOffset += SrcEltBits;
+ if (BitOffset >= WideBits) {
+ WideIdx++;
+ BitOffset -= WideBits;
+ if (BitOffset > 0) {
+ ShAmt = DAG.getConstant(
+ SrcEltBits - BitOffset, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ SDValue Hi =
+ DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+ }
+ }
+
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+
+ switch (ExtType) {
+ default: llvm_unreachable("Unknown extended-load op!");
+ case ISD::EXTLOAD:
+ Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::ZEXTLOAD:
+ Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::SEXTLOAD:
+ ShAmt =
+ DAG.getConstant(WideBits - SrcEltBits, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ }
+ Vals.push_back(Lo);
+ }
+
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
+ } else {
+ SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
+ // Skip past MERGE_VALUE node if known.
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
+ NewChain = Scalarized.getOperand(1);
+ Value = Scalarized.getOperand(0);
+ } else {
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
+ }
+ }
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
+SDValue VectorLegalizer::Expand(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::SIGN_EXTEND_INREG:
+ return ExpandSEXTINREG(Op);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ case ISD::BSWAP:
+ return ExpandBSWAP(Op);
+ case ISD::VSELECT:
+ return ExpandVSELECT(Op);
+ case ISD::SELECT:
+ return ExpandSELECT(Op);
+ case ISD::FP_TO_UINT:
+ return ExpandFP_TO_UINT(Op);
+ case ISD::UINT_TO_FP:
+ return ExpandUINT_TO_FLOAT(Op);
+ case ISD::FNEG:
+ return ExpandFNEG(Op);
+ case ISD::FSUB:
+ return ExpandFSUB(Op);
+ case ISD::SETCC:
+ return UnrollVSETCC(Op);
+ case ISD::ABS:
+ return ExpandABS(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
+ case ISD::CTPOP:
+ return ExpandCTPOP(Op);
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return ExpandCTLZ(Op);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return ExpandCTTZ(Op);
+ case ISD::FSHL:
+ case ISD::FSHR:
+ return ExpandFunnelShift(Op);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ return ExpandROT(Op);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::UADDO:
+ case ISD::USUBO:
+ return ExpandUADDSUBO(Op);
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ return ExpandSADDSUBO(Op);
+ case ISD::UMULO:
+ case ISD::SMULO:
+ return ExpandMULO(Op);
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ return ExpandAddSubSat(Op);
+ case ISD::SMULFIX:
+ case ISD::UMULFIX:
+ return ExpandFixedPointMul(Op);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIXSAT:
+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
+ // why. Maybe it results in worse codegen compared to the unroll for some
+ // targets? This should probably be investigated. And if we still prefer to
+ // unroll an explanation could be helpful.
+ return DAG.UnrollVectorOp(Op.getNode());
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ return ExpandStrictFPOp(Op);
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ return TLI.expandVecReduce(Op.getNode(), DAG);
+ default:
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+}
+
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = VT.changeVectorElementTypeToInteger();
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getSelect(DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
+ BitTy),
+ DAG.getConstant(0, DL, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ SDLoc DL(Op);
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ANY_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Build a base mask of undef shuffles.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.resize(NumSrcElements, -1);
+
+ // Place the extended lanes into the correct locations.
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // First build an any-extend node which can be legalized above when we
+ // recurse through it.
+ Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
+
+ // Now we need sign extend. Do this by shifting the elements. Even if these
+ // aren't legal operations, they have a better chance of being legalized
+ // without full scalarization than the sign extension does.
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
+ SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
+ return DAG.getNode(ISD::SRA, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+ ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Build up a zero vector to blend into this one.
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.reserve(NumSrcElements);
+ for (int i = 0; i < NumSrcElements; ++i)
+ ShuffleMask.push_back(i);
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
+static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
+ for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
+ ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+}
+
+SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ createBSWAPShuffleMask(VT, ShuffleMask);
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
+
+ // Only emit a shuffle if the mask is legal.
+ if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If the vector element width is a whole number of bytes, test if its legal
+ // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
+ // vector. This greatly reduces the number of bit shifts necessary.
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
+ SmallVector<int, 16> BSWAPMask;
+ createBSWAPShuffleMask(VT, BSWAPMask);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+ if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
+ (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
+ (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ BSWAPMask);
+ Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ }
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ SDLoc DL(Op);
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(Op1.getValueType()) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If the mask and the type are different sizes, unroll the vector op. This
+ // can occur when getSetCCResultType returns something that is different in
+ // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
+ if (VT.getSizeInBits() != Op1.getValueSizeInBits())
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandABS(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandABS(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+ // Attempt to expand using TargetLowering.
+ SDValue Result, Chain;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
+ if (Op.getNode()->isStrictFPOpcode())
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
+ return Result;
+ }
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDLoc DL(Op);
+
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+ return Result;
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned BW = VT.getScalarSizeInBits();
+ assert((BW == 64 || BW == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDLoc DL(Op);
+ SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
+ return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+ // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
+ // we can defer this to operation legalization where it will be lowered as
+ // a+(-b).
+ EVT VT = Op.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FADD, VT))
+ return Op; // Defer to LegalizeDAG
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandROT(SDValue Op) {
+ SDValue Result;
+ if (TLI.expandROT(Op.getNode(), Result, DAG))
+ return Result;
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+ SDValue Result, Overflow;
+ if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
+ if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumOpers = Op.getNumOperands();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT ValueVTs[] = {EltVT, MVT::Other};
+ SDValue Chain = Op.getOperand(0);
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 32> OpValues;
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SmallVector<SDValue, 4> Opers;
+ SDValue Idx = DAG.getConstant(i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned j = 1; j < NumOpers; ++j) {
+ SDValue Oper = Op.getOperand(j);
+ EVT OperVT = Oper.getValueType();
+
+ if (OperVT.isVector())
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperVT.getVectorElementType(), Oper, Idx);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+
+ OpValues.push_back(ScalarOp.getValue(0));
+ OpChains.push_back(ScalarOp.getValue(1));
+ }
+
+ SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+
+ AddLegalizedOperand(Op.getValue(0), Result);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return Op.getResNo() ? NewChain : Result;
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ SDLoc dl(Op);
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
+ }
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000000000..3763e886cef29
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,5122 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ R = ScalarizeVecRes_VecInregOp(N);
+ break;
+ case ISD::ABS:
+ case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ case ISD::FCANONICALIZE:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_EXTEND:
+ R = ScalarizeVecRes_StrictFPOp(N);
+ break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ R = ScalarizeVecRes_OverflowOp(N, ResNo);
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ R = ScalarizeVecRes_MULFIX(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
+ EVT VT = N->getValueType(0).getVectorElementType();
+ unsigned NumOpers = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT ValueVTs[] = {VT, MVT::Other};
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 4> Opers;
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector())
+ Oper = GetScalarizedVector(Oper);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
+ unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+
+ SDValue ScalarLHS, ScalarRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
+ ScalarLHS = GetScalarizedVector(N->getOperand(0));
+ ScalarRHS = GetScalarizedVector(N->getOperand(1));
+ } else {
+ SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
+ DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
+ DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
+ ScalarLHS = ElemsLHS[0];
+ ScalarRHS = ElemsRHS[0];
+ }
+
+ SDVTList ScalarVTs = DAG.getVTList(
+ ResVT.getVectorElementType(), OvVT.getVectorElementType());
+ SDNode *ScalarNode = DAG.getNode(
+ N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().isVector()
+ && Op.getValueType().getVectorNumElements() == 1
+ && !isSimpleLegalType(Op.getValueType()))
+ Op = GetScalarizedVector(Op);
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ NewVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { NewVT, MVT::Other },
+ { N->getOperand(0), Op, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(
+ ISD::UNINDEXED, N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
+ N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_SETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+
+ EVT OpVT = Op.getValueType();
+ EVT OpEltVT = OpVT.getVectorElementType();
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
+ }
+
+ llvm_unreachable("Illegal extend_vector_inreg opcode");
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ EVT OpVT = Cond.getValueType();
+ SDLoc DL(N);
+ // The vselect result and true/value operands needs scalarizing, but it's
+ // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
+ // See the similar logic in ScalarizeVecRes_SETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Cond = GetScalarizedVector(Cond);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Cond = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0).getValueType();
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
+ EVT CondVT = Cond.getValueType();
+ if (ScalarBool != VecBool) {
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
+ Cond, DAG.getConstant(1, SDLoc(N), CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+
+ // Truncate the condition if needed
+ auto BoolVT = getSetCCResultType(CondVT);
+ if (BoolVT.bitsLT(CondVT))
+ Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
+
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.isUndef())
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ SDLoc DL(N);
+
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ RHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// If the value to convert is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ N->getValueType(0), Elt);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ { N->getValueType(0).getScalarType(), MVT::Other },
+ { N->getOperand(0), Elt });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != VT)
+ Res = VT.isFloatingPoint()
+ ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
+ : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+ return Res;
+}
+
+/// If the input condition is a vector that needs to be scalarized, it must be
+/// <1 x i1>, so just convert to a normal ISD::SELECT
+/// (still with vector output type since that was acceptable if we got here).
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
+ SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
+ N->getOperand(2));
+}
+
+/// If the operand is a vector that needs to be scalarized then the
+/// result must be v1i1, so just convert to a scalar SETCC and wrap
+/// with a scalar_to_vector since the res type is legal if we got here
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ EVT NVT = VT.getVectorElementType();
+ SDLoc DL(N);
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
+}
+
+/// If the value to store is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ SDLoc dl(N);
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(
+ N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(), N->getAlignment(),
+ N->getMemOperand()->getFlags(), N->getAAInfo());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
+}
+
+/// If the value to round is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt,
+ N->getOperand(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Wrong operand for scalarization!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { N->getValueType(0).getVectorElementType(),
+ MVT::Other },
+ { N->getOperand(0), Elt, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ // Result type may be wider than element type.
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified result of the specified node is
+/// found to need vector splitting. At this point, the node may also have
+/// invalid operands or may have other results that need legalization, we just
+/// know that (at least) one result needs vector splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MGATHER:
+ SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+ case ISD::VAARG:
+ SplitVecRes_VAARG(N, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+ break;
+
+ case ISD::ABS:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::FCANONICALIZE:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ SplitVecRes_StrictFPOp(N, Lo, Hi);
+ break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
+ break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ SplitVecRes_MULFIX(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ SDLoc dl(N);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+ SDValue Op2 = N->getOperand(2);
+
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getBuildVector(LoVT, dl, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getBuildVector(HiVT, dl, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ SDLoc dl(N);
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ EVT VecVT = Vec.getValueType();
+ unsigned VecElems = VecVT.getVectorNumElements();
+ unsigned SubElems = SubVec.getValueType().getVectorNumElements();
+
+ // If we know the index is 0, and we know the subvector doesn't cross the
+ // boundary between the halves, we can avoid spilling the vector, and insert
+ // into the lower half of the split vector directly.
+ // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
+ // the index is constant and there is no boundary crossing. But those cases
+ // don't seem to get hit in practice.
+ if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = ConstIdx->getZExtValue();
+ if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
+ }
+ }
+
+ // Spill the vector to the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
+
+ // Load the Lo part from the stack slot.
+ Lo =
+ DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
+ StackPtr =
+ DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+
+ SDLoc dl(N);
+ SDValue InLo, InHi;
+
+ if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N0, InLo, InHi);
+ else
+ std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
+
+ EVT InLoVT = InLo.getValueType();
+ unsigned InNumElements = InLoVT.getVectorNumElements();
+
+ EVT OutLoVT, OutHiVT;
+ std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned OutNumElements = OutLoVT.getVectorNumElements();
+ assert((2 * OutNumElements) <= InNumElements &&
+ "Illegal extend vector in reg split");
+
+ // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
+ // input vector (i.e. we only use InLo):
+ // OutLo will extend the first OutNumElements from InLo.
+ // OutHi will extend the next OutNumElements from InLo.
+
+ // Shuffle the elements from InLo for OutHi into the bottom elements to
+ // create a 'fake' InHi.
+ SmallVector<int, 8> SplitHi(InNumElements, -1);
+ for (unsigned i = 0; i != OutNumElements; ++i)
+ SplitHi[i] = i + OutNumElements;
+ InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);
+
+ Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
+ Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned NumOps = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 4> OpsLo;
+ SmallVector<SDValue, 4> OpsHi;
+
+ // The Chain is the first operand.
+ OpsLo.push_back(Chain);
+ OpsHi.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
+
+ EVT InVT = Op.getValueType();
+ if (InVT.isVector()) {
+ // If the input also splits, handle it directly for a
+ // compile time speedup. Otherwise split it by hand.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Op, OpLo, OpHi);
+ else
+ std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
+ }
+
+ OpsLo.push_back(OpLo);
+ OpsHi.push_back(OpHi);
+ }
+
+ EVT LoValueVTs[] = {LoVT, MVT::Other};
+ EVT HiValueVTs[] = {HiVT, MVT::Other};
+ Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+
+ // Build a factor node to remember that this Op is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ //The results of each unrolled operation, including the chain.
+ EVT ChainVTs[] = {EltVT, MVT::Other};
+ SmallVector<SDValue, 8> Chains;
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ Operands[0] = Chain;
+ for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+ DAG.getDataLayout())));
+ } else {
+ Operands[j] = Operand;
+ }
+ }
+ SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+ Scalar.getNode()->setFlags(N->getFlags());
+
+ //Add in the scalar as well as its chain value to the
+ //result vectors.
+ Scalars.push_back(Scalar);
+ Chains.push_back(Scalar.getValue(1));
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(DAG.getUNDEF(EltVT));
+
+ // Build a new factor node to connect the chain back together.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ // Create a new BUILD_VECTOR node
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+ return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
+void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
+ std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
+ std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
+
+ SDValue LoLHS, HiLHS, LoRHS, HiRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
+ GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
+ } else {
+ std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
+ std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
+ }
+
+ unsigned Opcode = N->getOpcode();
+ SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
+ SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
+ SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
+ SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+
+ Lo = SDValue(LoNode, ResNo);
+ Hi = SDValue(HiNode, ResNo);
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo),
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, OtherVT,
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getConstant(IdxVal - LoNumElts, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return;
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
+ // Make the vector elements byte-addressable if they aren't already.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorNumElements());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ // Extend the element type to match if needed.
+ if (EltVT.bitsGT(Elt.getValueType()))
+ Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
+ }
+
+ // Spill the vector to the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
+ MachinePointerInfo::getUnknownStack(MF), EltVT);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = LoVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl,
+ StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize));
+
+ // If we adjusted the original type, we need to truncate the results.
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ if (LoVT != Lo.getValueType())
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
+ if (HiVT != Hi.getValueType())
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
+ Alignment, MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Mask = MLD->getMask();
+ SDValue PassThru = MLD->getPassThru();
+ unsigned Alignment = MLD->getOriginalAlignment();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
+ ExtType, MLD->isExpandingLoad());
+
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
+ HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
+ ExtType, MLD->isExpandingLoad());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
+void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MGT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+ SDValue Ch = MGT->getChain();
+ SDValue Ptr = MGT->getBasePtr();
+ SDValue Mask = MGT->getMask();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ EVT MemoryVT = MGT->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
+ MMO, MGT->getIndexType());
+
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
+ MMO, MGT->getIndexType());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MGT, 1), Ch);
+}
+
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ SDLoc DL(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly. Otherwise split it by hand.
+ SDValue LL, LH, RL, RH;
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), LL, LH);
+ else
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+
+ if (getTypeAction(N->getOperand(1).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(1), RL, RH);
+ else
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(OpNo), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ unsigned NumElements = SrcVT.getVectorNumElements();
+ if ((NumElements & 1) == 0 &&
+ SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
+ EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
+
+ EVT SplitLoVT, SplitHiVT;
+ std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getBuildVector(NewVT, dl, SVOps);
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
+ }
+
+ Ops.clear();
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDValue SV = N->getOperand(2);
+ SDLoc dl(N);
+
+ const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
+ NVT.getTypeForEVT(*DAG.getContext()));
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+ Chain = Hi.getValue(1);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified operand of the specified node is
+/// found to need vector splitting. At this point, all of the result types of
+/// the node are known to be legal, but other operands of the node may need
+/// legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom split this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSCATTER:
+ Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+ break;
+ case ISD::MGATHER:
+ Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+ Res = SplitVecOp_TruncateHelper(N);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
+ SDLoc DL(N);
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");
+
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
+
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ bool NoNaN = N->getFlags().hasNoNaNs();
+ unsigned CombineOpc = 0;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
+ case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
+ break;
+ default:
+ llvm_unreachable("Unexpected reduce ISD node");
+ }
+
+ // Use the appropriate scalar instruction on the split subvectors before
+ // reducing the now partially reduced smaller vector.
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi });
+
+ // Build a factor node to remember that this operation is independent
+ // of the other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, dl,
+ Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts, SDLoc(N),
+ Idx.getValueType())), 0);
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
+ // Make the vector elements byte-addressable if they aren't already.
+ SDLoc dl(N);
+ EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorNumElements());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ }
+
+ // Store the vector to the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ // Load back the required element.
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+ // FIXME: This is to handle i1 vectors with elements promoted to i8.
+ // i1 vector handling needs general improvement.
+ if (N->getValueType(0).bitsLT(EltVT)) {
+ SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
+ }
+
+ return DAG.getExtLoad(
+ ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
+ SDValue Lo, Hi;
+
+ // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
+ // splitting the result has the same effect as splitting the input operand.
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
+ unsigned OpNo) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MGT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+ SDValue Ch = MGT->getChain();
+ SDValue Ptr = MGT->getBasePtr();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue Mask = MGT->getMask();
+ SDValue PassThru = MGT->getPassThru();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MGT->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue PassThruLo, PassThruHi;
+ if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(PassThru, PassThruLo, PassThruHi);
+ else
+ std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
+ SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
+ OpsLo, MMO, MGT->getIndexType());
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
+
+ SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
+ SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
+ OpsHi, MMO, MGT->getIndexType());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MGT, 1), Ch);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
+ Hi);
+ ReplaceValueWith(SDValue(MGT, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
+ HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
+ N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ N->isTruncatingStore(), N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Index = N->getIndex();
+ SDValue Scale = N->getScale();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ // Split all operands
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+ SDValue Lo;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+ DL, OpsLo, MMO, N->getIndexType());
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO, N->getIndexType());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ SDLoc DL(N);
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ // Scalarize if the split halves are not byte-sized.
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
+ return TLI.scalarizeVectorStore(N, DAG);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
+ Alignment, MMOFlags, AAInfo);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, Alignment, MMOFlags, AAInfo);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ Alignment, MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc DL(N);
+
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (const SDValue &Op : N->op_values()) {
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+ }
+
+ return DAG.getBuildVector(N->getValueType(0), DL, Elts);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ SDValue InVec = N->getOperand(0);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ unsigned NumElements = OutVT.getVectorNumElements();
+ bool IsFloat = OutVT.isFloatingPoint();
+
+ // Widening should have already made sure this is a power-two vector
+ // if we're trying to split it at all. assert() that's true, just in case.
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+ unsigned InElementSize = InVT.getScalarSizeInBits();
+ unsigned OutElementSize = OutVT.getScalarSizeInBits();
+
+ // Determine the split output VT. If its legal we can just split dirctly.
+ EVT LoOutVT, HiOutVT;
+ std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+ assert(LoOutVT == HiOutVT && "Unequal split?");
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (isTypeLegal(LoOutVT) ||
+ InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ SDLoc DL(N);
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = InVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+ if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
+ return SplitVecOp_UnaryOp(N);
+
+ // Get the split input vector.
+ SDValue InLoVec, InHiVec;
+ GetSplitVector(InVec, InLoVec, InHiVec);
+
+ // Truncate them to 1/2 the element size.
+ EVT HalfElementVT = IsFloat ?
+ EVT::getFloatingPointVT(InElementSize/2) :
+ EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements/2);
+ SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+ return IsFloat
+ ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+ DAG.getTargetConstant(
+ 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
+ : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::MLOAD:
+ Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SUB:
+ case ISD::XOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = WidenVecRes_BinaryCanTrap(N);
+ break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ // These are binary operations, but with an extra operand that shouldn't
+ // be widened (the scale).
+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);
+ break;
+
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FMAXNUM:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FTRUNC:
+ Res = WidenVecRes_StrictFP(N);
+ break;
+
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ Res = WidenVecRes_OverflowOp(N, ResNo);
+ break;
+
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = WidenVecRes_Convert_StrictFP(N);
+ break;
+
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC: {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ break;
+ }
+ }
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those like
+ // any other unary ops.
+ LLVM_FALLTHROUGH;
+
+ case ISD::ABS:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::FNEG:
+ case ISD::FCANONICALIZE:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
+ // Binary op widening, but with an extra operand that shouldn't be widened.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
+ N->getFlags());
+}
+
+// Given a vector of operations that have been broken up to widen, see
+// if we can collect them together into the next widest legal VT. This
+// implementation is trap-safe.
+static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
+ SmallVectorImpl<SDValue> &ConcatOps,
+ unsigned ConcatEnd, EVT VT, EVT MaxVT,
+ EVT WidenVT) {
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ SDLoc dl(ConcatOps[0]);
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ int Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags Flags = N->getFlags();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2, Flags);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ // StrictFP op widening for operations that can trap.
+ unsigned NumOpers = N->getNumOperands();
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SmallVector<SDValue, 4> InOps;
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ SmallVector<SDValue, 16> Chains;
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // The Chain is the first operand.
+ InOps.push_back(N->getOperand(0));
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOpers; ++i) {
+ SDValue Oper = N->getOperand(i);
+
+ if (Oper.getValueType().isVector()) {
+ assert(Oper.getValueType() == N->getValueType(0) &&
+ "Invalid operand type to widen!");
+ Oper = GetWidenedVector(Oper);
+ }
+
+ InOps.push_back(Oper);
+ }
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EOps.push_back(Op);
+ }
+
+ EVT OperVT[] = {VT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SmallVector<SDValue, 4> EOps;
+
+ for (unsigned i = 0; i < NumOpers; ++i) {
+ SDValue Op = InOps[i];
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ DAG.getConstant(Idx, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EOps.push_back(Op);
+ }
+
+ EVT WidenVT[] = {WidenEltVT, MVT::Other};
+ SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
+ ConcatOps[ConcatEnd++] = Oper;
+ Chains.push_back(Oper.getValue(1));
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Build a factor node to remember all the Ops that have been created.
+ SDValue NewChain;
+ if (Chains.size() == 1)
+ NewChain = Chains[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT WideResVT, WideOvVT;
+ SDValue WideLHS, WideRHS;
+
+ // TODO: This might result in a widen/split loop.
+ if (ResNo == 0) {
+ WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
+ WideOvVT = EVT::getVectorVT(
+ *DAG.getContext(), OvVT.getVectorElementType(),
+ WideResVT.getVectorNumElements());
+
+ WideLHS = GetWidenedVector(N->getOperand(0));
+ WideRHS = GetWidenedVector(N->getOperand(1));
+ } else {
+ WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
+ WideResVT = EVT::getVectorVT(
+ *DAG.getContext(), ResVT.getVectorElementType(),
+ WideOvVT.getVectorNumElements());
+
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ WideLHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(0), Zero);
+ WideRHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(1), Zero);
+ }
+
+ SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
+ SDNode *WideNode = DAG.getNode(
+ N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
+
+ // Replace the other vector result not being explicitly widened here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+ SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
+ } else {
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue OtherVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(WideNode, ResNo);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ const SDNodeFlags Flags = N->getFlags();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
+ }
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
+ }
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
+ SDValue InOp = N->getOperand(1);
+ SDLoc DL(N);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+
+ // FIXME: Optimizations need to be implemented here.
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 32> OpChains;
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenSVT = WidenVT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ }
+ }
+ }
+
+ // Unroll, extend the scalars and rebuild the vector.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
+ break;
+ default:
+ llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
+ }
+ Ops.push_back(Val);
+ }
+
+ while (Ops.size() != WidenNumElts)
+ Ops.push_back(DAG.getUNDEF(WidenSVT));
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ SDValue NewVec;
+ if (InVT.isVector()) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
+
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ } else {
+ NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+
+ // Integer BUILD_VECTOR operands may be larger than the node's vector element
+ // type. The UNDEFs need to have the same type as the existing operands.
+ EVT EltVT = N->getOperand(0).getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
+ NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+
+ return DAG.getBuildVector(WidenVT, dl, NewOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ MaskOps);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal + i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, PassThru, N->getMemoryVT(),
+ N->getMemOperand(), ExtType,
+ N->isExpandingLoad());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue PassThru = GetWidenedVector(N->getPassThru());
+ SDValue Scale = N->getScale();
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
+ Scale };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand(), N->getIndexType());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
+ WidenVT, N->getOperand(0));
+}
+
+// Return true if this is a node that could have two SETCCs as operands.
+static inline bool isLogicalMaskOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return true;
+ }
+ return false;
+}
+
+// This is used just for the assert in convertMask(). Check that this either
+// a SETCC or a previously handled SETCC by convertMask().
+#ifndef NDEBUG
+static inline bool isSETCCorConvertedSETCC(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned i = 1; i < N->getNumOperands(); ++i)
+ if (!N->getOperand(i)->isUndef())
+ return false;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::SIGN_EXTEND)
+ N = N.getOperand(0);
+
+ if (isLogicalMaskOp(N.getOpcode()))
+ return isSETCCorConvertedSETCC(N.getOperand(0)) &&
+ isSETCCorConvertedSETCC(N.getOperand(1));
+
+ return (N.getOpcode() == ISD::SETCC ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+}
+#endif
+
+// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
+// to ToMaskVT if needed with vector extension or truncation.
+SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
+ EVT ToMaskVT) {
+ // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
+ // FIXME: This code seems to be too restrictive, we might consider
+ // generalizing it or dropping it.
+ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
+
+ // Make a new Mask node, with a legal result VT.
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
+ Ops.push_back(InMask->getOperand(i));
+ SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+
+ // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
+ // extend or truncate is needed.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
+ unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
+ if (MaskScalarBits < ToMaskScalBits) {
+ EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
+ } else if (MaskScalarBits > ToMaskScalBits) {
+ EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
+ }
+
+ assert(Mask->getValueType(0).getScalarSizeInBits() ==
+ ToMaskVT.getScalarSizeInBits() &&
+ "Mask should have the right element size by now.");
+
+ // Adjust Mask to the right number of elements.
+ unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
+ if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
+ ZeroIdx);
+ } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
+ unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
+ EVT SubVT = Mask->getValueType(0);
+ SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT));
+ SubOps[0] = Mask;
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
+ }
+
+ assert((Mask->getValueType(0) == ToMaskVT) &&
+ "A mask of ToMaskVT should have been produced by now.");
+
+ return Mask;
+}
+
+// This method tries to handle VSELECT and its mask by legalizing operands
+// (which may require widening) and if needed adjusting the mask vector type
+// to match that of the VSELECT. Without it, many cases end up with
+// scalarization of the SETCC, with many unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Cond = N->getOperand(0);
+
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ return SDValue();
+
+ // If this is a splitted VSELECT that was previously already handled, do
+ // nothing.
+ EVT CondVT = Cond->getValueType(0);
+ if (CondVT.getScalarSizeInBits() != 1)
+ return SDValue();
+
+ EVT VSelVT = N->getValueType(0);
+ // Only handle vector types which are a power of 2.
+ if (!isPowerOf2_64(VSelVT.getSizeInBits()))
+ return SDValue();
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = VSelVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
+
+ if (FinalVT.getVectorNumElements() == 1)
+ return SDValue();
+
+ // If there is support for an i1 vector mask, don't touch.
+ if (Cond.getOpcode() == ISD::SETCC) {
+ EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
+ SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
+ EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
+ if (SetCCResVT.getScalarSizeInBits() == 1)
+ return SDValue();
+ } else if (CondVT.getScalarType() == MVT::i1) {
+ // If there is support for an i1 vector mask (or only scalar i1 conditions),
+ // don't touch.
+ while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+ CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+ if (CondVT.getScalarType() == MVT::i1)
+ return SDValue();
+ }
+
+ // Get the VT and operands for VSELECT, and widen if needed.
+ SDValue VSelOp1 = N->getOperand(1);
+ SDValue VSelOp2 = N->getOperand(2);
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
+ VSelOp1 = GetWidenedVector(VSelOp1);
+ VSelOp2 = GetWidenedVector(VSelOp2);
+ }
+
+ // The mask of the VSELECT should have integer elements.
+ EVT ToMaskVT = VSelVT;
+ if (!ToMaskVT.getScalarType().isInteger())
+ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
+
+ SDValue Mask;
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else if (isLogicalMaskOp(Cond->getOpcode()) &&
+ Cond->getOperand(0).getOpcode() == ISD::SETCC &&
+ Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ // Cond is (AND/OR/XOR (SETCC, SETCC))
+ SDValue SETCC0 = Cond->getOperand(0);
+ SDValue SETCC1 = Cond->getOperand(1);
+ EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
+ EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
+ unsigned ScalarBits0 = VT0.getScalarSizeInBits();
+ unsigned ScalarBits1 = VT1.getScalarSizeInBits();
+ unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
+ EVT MaskVT;
+ // If the two SETCCs have different VTs, either extend/truncate one of
+ // them to the other "towards" ToMaskVT, or truncate one and extend the
+ // other to ToMaskVT.
+ if (ScalarBits0 != ScalarBits1) {
+ EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
+ EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
+ if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
+ MaskVT = WideVT;
+ else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
+ MaskVT = NarrowVT;
+ else
+ MaskVT = ToMaskVT;
+ } else
+ // If the two SETCCs have the same VT, don't change it.
+ MaskVT = VT0;
+
+ // Make new SETCCs and logical nodes.
+ SETCC0 = convertMask(SETCC0, VT0, MaskVT);
+ SETCC1 = convertMask(SETCC1, VT1, MaskVT);
+ Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
+
+ // Convert the logical op for VSELECT if needed.
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non-vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+
+ // The input and output types often differ here, and it could be that while
+ // we'd prefer to widen the result type, the input operands have been split.
+ // In this case, we also need to split the result of this node as well.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+ SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+ return Res;
+ }
+
+ // If the inputs also widen, handle them directly. Otherwise widen by hand.
+ SDValue InOp2 = N->getOperand(1);
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp1 = GetWidenedVector(InOp1);
+ InOp2 = GetWidenedVector(InOp2);
+ } else {
+ InOp1 = DAG.WidenVector(InOp1, SDLoc(N));
+ InOp2 = DAG.WidenVector(InOp2, SDLoc(N));
+ }
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, SDLoc(N),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ Res = WidenVecOp_Convert(N);
+ break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = WidenVecOp_VECREDUCE(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+ EVT FixedVT = (MVT::SimpleValueType)i;
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ else
+ InOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ case ISD::SIGN_EXTEND:
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ case ISD::ZERO_EXTEND:
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
+ }
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // See if a widened result type would be legal, if so widen the node.
+ // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ InVT.getVectorNumElements());
+ if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ EVT InEltVT = InVT.getVectorElementType();
+
+ // Unroll the convert into some scalar code and create a nasty build vector.
+ SmallVector<SDValue, 16> Ops(NumElts);
+ if (N->isStrictFPOpcode()) {
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i=0; i < NumElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(
+ Opcode, dl, EltVT,
+ DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+ // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+ // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+ // having to copy via memory.
+ if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (InWidenSize % EltSize == 0) {
+ unsigned NewNumElts = InWidenSize / EltSize;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT InVT = N->getOperand(0).getValueType();
+ SDLoc dl(N);
+
+ // If the widen width for this operand is the same as the width of the concat
+ // and all but the first operand is undef, just use the widened operand.
+ unsigned NumOperands = N->getNumOperands();
+ if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ unsigned i;
+ for (i = 1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ return GetWidenedVector(N->getOperand(0));
+ }
+
+ // Otherwise, fall back to a nasty build vector.
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value, but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ if (!ST->getMemoryVT().getScalarType().isByteSized())
+ return TLI.scalarizeVectorStore(ST, DAG);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 3) &&
+ "Can widen only data or mask operand of mstore");
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ SDLoc dl(N);
+
+ if (OpNo == 1) {
+ // Widen the value.
+ StVal = GetWidenedVector(StVal);
+
+ // The mask should be widened as well.
+ EVT WideVT = StVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else {
+ // Widen the mask.
+ EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ EVT ValueVT = StVal.getValueType();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
+ }
+
+ assert(Mask.getValueType().getVectorNumElements() ==
+ StVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
+ Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ false, MST->isCompressingStore());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 4 && "Can widen only the index of mgather");
+ auto *MG = cast<MaskedGatherSDNode>(N);
+ SDValue DataOp = MG->getPassThru();
+ SDValue Mask = MG->getMask();
+ SDValue Scale = MG->getScale();
+
+ // Just widen the index. It's allowed to have extra elements.
+ SDValue Index = GetWidenedVector(MG->getIndex());
+
+ SDLoc dl(N);
+ SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
+ Scale};
+ SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
+ MG->getMemOperand(), MG->getIndexType());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+
+ if (OpNo == 1) {
+ DataOp = GetWidenedVector(DataOp);
+ unsigned NumElts = DataOp.getValueType().getVectorNumElements();
+
+ // Widen index.
+ EVT IndexVT = Index.getValueType();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getVectorElementType(), NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ // The mask should be widened as well.
+ EVT MaskVT = Mask.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else if (OpNo == 4) {
+ // Just widen the index. It's allowed to have extra elements.
+ Index = GetWidenedVector(Index);
+ } else
+ llvm_unreachable("Can't widen this operand of mscatter");
+
+ SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
+ Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), SDLoc(N), Ops,
+ MSC->getMemOperand(), MSC->getIndexType());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = getSetCCResultType(InOp0.getValueType());
+ // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
+ if (VT.getScalarType() == MVT::i1)
+ SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ SVT.getVectorNumElements());
+
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ VT.getVectorNumElements());
+ SDValue CC = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, dl, VT, CC);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(0));
+ EVT OrigVT = N->getOperand(0).getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+
+ SDValue NeutralElem;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_UMAX:
+ NeutralElem = DAG.getConstant(0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_MUL:
+ NeutralElem = DAG.getConstant(1, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_UMIN:
+ NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMIN:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FADD:
+ NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMUL:
+ NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMAX:
+ NeutralElem = DAG.getConstantFP(
+ std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMIN:
+ NeutralElem = DAG.getConstantFP(
+ -std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ }
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorNumElements();
+ unsigned WideElts = WideVT.getVectorNumElements();
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
+ // This only gets called in the case that the left and right inputs and
+ // result are of a legal odd vector type, and the condition is illegal i1 of
+ // the same odd width that needs widening.
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
+
+ SDValue Cond = GetWidenedVector(N->getOperand(0));
+ SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
+ SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
+ SDLoc DL(N);
+
+ SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
+ LeftIn, RightIn);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store.
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVectorImpl<SDValue> &LdOps,
+ unsigned Start, unsigned End) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(LdOps[Start]);
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type.
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load power-of-two widths.
+ // The routine chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth;
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Align, MMOFlags, AAInfo);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction.
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar.
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
+
+ SDValue L;
+ if (LdWidth < NewVTWidth) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+ // Later code assumes the vector loads produced will be mergeable, so we
+ // must pad the final entry up to the previous width. Scalars are
+ // combined separately.
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+ LdOp = L;
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the load operations.
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to load are power-of-2, and the scalar loads can be
+ // combined to make a power-of-2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First, combine the scalar loads to a vector.
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector.
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+
+ // We need to fill the rest with undefs to build the vector.
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extend it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen.
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] =
+ DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
+ LdEltVT, Align, MMOFlags, AAInfo);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ Align, MMOFlags, AAInfo);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs.
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getBuildVector(WidenVT, dl, Ops);
+}
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store power-of-two widths.
+ // The routine chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with.
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store.
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type.
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getConstant(Idx++, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type.
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that the wide vector type is bigger than where we need to
+ // store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal vector
+ // types and bitcast it to the right type. Instead, we unroll the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT, Align,
+ MMOFlags, AAInfo));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getTruncStore(
+ Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ SDLoc dl(InOp);
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = FillVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = FillVal;
+ return DAG.getBuildVector(NVT, dl, Ops);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 0000000000000..34660e3a48ec5
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,625 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scheduler"
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<int> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
+ : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TLI = IS->TLI;
+ TII = STI.getInstrInfo();
+ ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
+ // This hard requirement could be relaxed, but for now
+ // do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+
+ SUnit *PredSU = Pred.getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+
+ SUnit *SuccSU = Succ.getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (const SDep &Succ : SU->Succs)
+ if (Succ.isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SDep &Pred : SU->Preds)
+ if (Pred.isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (const SDep &Pred : SU->Preds) {
+ SUnit &PredSU = *Pred.getSUnit();
+ if (!PredSU.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
+ return nullptr;
+ OnlyAvailablePred = &PredSU;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions already in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (const SDep &Succ : Packet[i]->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order deps.
+ if (Succ.isCtrl())
+ continue;
+
+ if (Succ.getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ MVT VT = SU->getNode()->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is achieved by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ else {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 15;
+static const unsigned PriorityFour = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform-specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFour;
+ break;
+
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ ResCount += PriorityThree;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --Pred.getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (const SDep &Succ : SU->Succs) {
+ adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
+ if (!Succ.isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return nullptr;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ int BestCost = SUSchedulingCost(*Best);
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != std::prev(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 0000000000000..65b9d017fc5cd
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,166 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
+#include <utility>
+
+namespace llvm {
+
+class DIVariable;
+class DIExpression;
+class SDNode;
+class Value;
+class raw_ostream;
+
+/// Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgValue {
+public:
+ enum DbgValueKind {
+ SDNODE = 0, ///< Value is the result of an expression.
+ CONST = 1, ///< Value is a constant.
+ FRAMEIX = 2, ///< Value is contents of a stack location.
+ VREG = 3 ///< Value is a virtual register.
+ };
+private:
+ union {
+ struct {
+ SDNode *Node; ///< Valid for expressions.
+ unsigned ResNo; ///< Valid for expressions.
+ } s;
+ const Value *Const; ///< Valid for constants.
+ unsigned FrameIx; ///< Valid for stack objects.
+ unsigned VReg; ///< Valid for registers.
+ } u;
+ DIVariable *Var;
+ DIExpression *Expr;
+ DebugLoc DL;
+ unsigned Order;
+ enum DbgValueKind kind;
+ bool IsIndirect;
+ bool Invalid = false;
+ bool Emitted = false;
+
+public:
+ /// Constructor for non-constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R,
+ bool indir, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) {
+ kind = SDNODE;
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+
+ /// Constructor for constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
+ kind = CONST;
+ u.Const = C;
+ }
+
+ /// Constructor for virtual registers and frame indices.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx,
+ bool IsIndirect, DebugLoc DL, unsigned Order,
+ enum DbgValueKind Kind)
+ : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) {
+ assert((Kind == VREG || Kind == FRAMEIX) &&
+ "Invalid SDDbgValue constructor");
+ kind = Kind;
+ if (kind == VREG)
+ u.VReg = VRegOrFrameIdx;
+ else
+ u.FrameIx = VRegOrFrameIdx;
+ }
+
+ /// Returns the kind.
+ DbgValueKind getKind() const { return kind; }
+
+ /// Returns the DIVariable pointer for the variable.
+ DIVariable *getVariable() const { return Var; }
+
+ /// Returns the DIExpression pointer for the expression.
+ DIExpression *getExpression() const { return Expr; }
+
+ /// Returns the SDNode* for a register ref
+ SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
+
+ /// Returns the ResNo for a register ref
+ unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
+
+ /// Returns the Value* for a constant
+ const Value *getConst() const { assert (kind==CONST); return u.Const; }
+
+ /// Returns the FrameIx for a stack object
+ unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
+
+ /// Returns the Virtual Register for a VReg
+ unsigned getVReg() const { assert (kind==VREG); return u.VReg; }
+
+ /// Returns whether this is an indirect value.
+ bool isIndirect() const { return IsIndirect; }
+
+ /// Returns the DebugLoc.
+ DebugLoc getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+
+ /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ /// property. A SDDbgValue is invalid if the SDNode that produces the value is
+ /// deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() const { return Invalid; }
+
+ /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this
+ /// SDDbgValue has been emitted to an MBB.
+ void setIsEmitted() { Emitted = true; }
+ bool isEmitted() const { return Emitted; }
+
+ /// clearIsEmitted - Reset Emitted flag, for certain special cases where
+ /// dbg.addr is emitted twice.
+ void clearIsEmitted() { Emitted = false; }
+
+ LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
+};
+
+/// Holds the information from a dbg_label node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgLabel {
+ MDNode *Label;
+ DebugLoc DL;
+ unsigned Order;
+
+public:
+ SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O)
+ : Label(Label), DL(std::move(dl)), Order(O) {}
+
+ /// Returns the MDNode pointer for the label.
+ MDNode *getLabel() const { return Label; }
+
+ /// Returns the DebugLoc.
+ DebugLoc getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000000000..7ee44c808fcbf
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,804 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return nullptr;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+ void ListScheduleBottomUp();
+
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const override { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ LLVM_DEBUG(dump());
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*PredSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[Pred.getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ LiveRegCycles[Pred.getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[Succ.getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegCycles[Succ.getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return nullptr;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue)
+ return nullptr;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return nullptr;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = newSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = newSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPred = Pred;
+ else if (Pred.getSUnit()->getNode() &&
+ Pred.getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPred(NewSU, Pred);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI).second) {
+ LRegs.push_back(*AI);
+ Added = true;
+ }
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVectorImpl<unsigned> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep()) {
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Register::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ MachineBasicBlock *
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(nullptr);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling: ");
+ LLVM_DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = nullptr;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SDNode &Node : DAG->allnodes()) {
+ SDNode *N = &Node;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (const SDNode *U : Glue->uses())
+ if (U == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
+
+ unsigned NumNodes = Sequence.size();
+ MachineBasicBlock *BB = Emitter.getBlock();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ LLVM_DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+
+ // Emit any debug values associated with the node.
+ if (N->getHasDebugValue()) {
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (!DV->isEmitted())
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000000000..ff806bdb822c2
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,3188 @@
+//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit *> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle = 0;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
+
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+
+ using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
+
+ LRegsMapT LRegsMap;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf),
+ NeedLatency(needlatency), AvailableQueue(availqueue),
+ Topo(SUnits, nullptr) {
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGRRList() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Does *NOT* update the topological ordering! It just queues an update.
+ void AddPredQueued(SUnit *SU, const SDep &D) {
+ Topo.AddPredQueued(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *TryUnfoldSU(SUnit *);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+
+ void releaseInterferences(unsigned Reg = 0);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = newSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.MarkDirty();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.MarkDirty();
+ return NewNode;
+ }
+
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool forceUnitLatencies() const override {
+ return !NeedLatency;
+ }
+};
+
+} // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
+ MVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::Untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle =
+ DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
+ NumLiveRegs = 0;
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ LLVM_DEBUG(dump());
+ Topo.MarkDirty();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+
+ AvailableQueue->releaseState();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*PredSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!forceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ while (true) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : N->op_values())
+ if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ while (true) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = nullptr;
+ unsigned BestMaxNest = MaxNest;
+ for (const SDValue &Op : N->op_values()) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(Op.getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return nullptr;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return nullptr;
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ if (!LiveRegGens[Pred.getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[Pred.getReg()] = SU;
+ }
+ }
+ }
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+ assert(N && "Must find call sequence start");
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ HazardRec->RecedeCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = SU->getHeight();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, -Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled instruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->scheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SDep &Succ : SU->Succs) {
+ // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
+ if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegGens[Succ.getReg()] = nullptr;
+ releaseInterferences(Succ.getReg());
+ }
+ }
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, increase the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ for (SDep &Pred : SU->Preds) {
+ CapturePred(&Pred);
+ if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[Pred.getReg()] = nullptr;
+ LiveRegGens[Pred.getReg()] = nullptr;
+ releaseInterferences(Pred.getReg());
+ }
+ }
+
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ SUnit *SeqEnd = CallSeqEndForStart[SU];
+ assert(SeqEnd && "Call sequence start/end must be known");
+ assert(!LiveRegDefs[CallResource]);
+ assert(!LiveRegGens[CallResource]);
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = SeqEnd;
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[CallResource]);
+ assert(LiveRegGens[CallResource]);
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ for (auto &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ auto Reg = Succ.getReg();
+ if (!LiveRegDefs[Reg])
+ ++NumLiveRegs;
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[Reg] = SU;
+
+ // Update LiveRegGen only if was empty before this unscheduling.
+ // This is to avoid incorrect updating LiveRegGen set in previous run.
+ if (!LiveRegGens[Reg]) {
+ // Find the successor with the lowest height.
+ LiveRegGens[Reg] = Succ.getSUnit();
+ for (auto &Succ2 : SU->Succs) {
+ if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
+ Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
+ LiveRegGens[Reg] = Succ2.getSUnit();
+ }
+ }
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->unscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (auto E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// TryUnfold - Attempt to unfold
+SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ // Use while over if to ease fall through.
+ SmallVector<SDNode *, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ // If LoadSU has already been scheduled, we should clone it but
+ // this would negate the benefit to unfolding so just return SU.
+ if (LoadSU->isScheduled)
+ return SU;
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ bool isNewN = true;
+ SUnit *NewSU;
+ // This can only happen when isNewLoad is false.
+ if (N->getNodeId() != -1) {
+ NewSU = &SUnits[N->getNodeId()];
+ // If NewSU has already been scheduled, we need to clone it, but this
+ // negates the benefit to unfolding so just return SU.
+ if (NewSU->isScheduled) {
+ return SU;
+ }
+ isNewN = false;
+ } else {
+ NewSU = CreateNewSUnit(N);
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+ }
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+
+ // Now that we are committed to unfolding replace DAG Uses.
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
+ SDValue(LoadNode, 1));
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPredQueued(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPredQueued(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPredQueued(NewSU, Pred);
+ }
+ for (SDep D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPredQueued(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
+ !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPredQueued(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPredQueued(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ if (isNewN)
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0)
+ NewSU->isAvailable = true;
+
+ return NewSU;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ if (N->getGluedNode() &&
+ !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Giving up because it has incoming glue and the target does not "
+ "want to copy it\n");
+ return nullptr;
+ }
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue) {
+ LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
+ return nullptr;
+ } else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs() << "Giving up because it one of the operands is glue and "
+ "the target does not want to copy it\n");
+ return nullptr;
+ }
+ }
+
+ // If possible unfold instruction.
+ if (TryUnfold) {
+ SUnit *UnfoldSU = TryUnfoldSU(SU);
+ if (!UnfoldSU)
+ return nullptr;
+ SU = UnfoldSU;
+ N = SU->getNode();
+ // If this can be scheduled don't bother duplicating and just return
+ if (SU->NumSuccsLeft == 0)
+ return SU;
+ }
+
+ LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPredQueued(NewSU, Pred);
+
+ // Make sure the clone comes after the original. (InstrEmitter assumes
+ // this ordering.)
+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(NewSU);
+ AddPredQueued(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(nullptr);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(nullptr);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(CopyToSU);
+ AddPredQueued(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ }
+ }
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
+
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPredQueued(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPredQueued(CopyToSU, ToDep);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ SUnit **LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI).second) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ ArrayRef<SUnit*> LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i).second)
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (const SDValue &Op : N->op_values())
+ if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
+ return RegOp->getRegMask();
+ return nullptr;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Register::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) &&
+ RegAdded.insert(CallResource).second)
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask,
+ makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
+
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (MCID.hasOptionalDef()) {
+ // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
+ // This operand can be either a def of CPSR, if the S bit is set; or a use
+ // of %noreg. When the OptionalDef is set to a valid register, we need to
+ // handle it in the same way as an ImplicitDef.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
+ if (MCID.OpInfo[i].isOptionalDef()) {
+ const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
+ unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ }
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
+ if (!is_contained(LRegs, Reg))
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
+ auto FindAvailableNode = [&]() {
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LLVM_DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
+ else dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Interferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
+ }
+ };
+ FindAvailableNode();
+ if (CurSU)
+ return CurSU;
+
+ // We query the topological order in the loop body, so make sure outstanding
+ // updates are applied before entering it (we only enter the loop if there
+ // are some interferences). If we make changes to the ordering, we exit
+ // the loop.
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (SUnit *TrySU : Interferences) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = nullptr;
+ unsigned LiveCycle = std::numeric_limits<unsigned>::max();
+ for (unsigned Reg : LRegs) {
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
+ << ") to SU(" << TrySU->NodeNum << ")\n");
+ AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer available.
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
+ LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
+ CurSU = AvailableQueue->pop();
+ } else {
+ LLVM_DEBUG(dbgs() << "TrySU available\n");
+ // Available and in AvailableQueue
+ AvailableQueue->remove(TrySU);
+ CurSU = TrySU;
+ }
+ FindAvailableNode();
+ // Interferences has been mutated. We must break.
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
+ LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
+ "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+namespace {
+
+class RegReductionPQBase;
+
+struct queue_sort {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+
+ ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit *> Queue;
+ unsigned CurQueueId = 0;
+ bool TracksRegPressure;
+ bool SrcOrder;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG = nullptr;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
+ SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits) override;
+
+ void addNode(const SUnit *SU) override;
+
+ void updateNode(const SUnit *SU) override;
+
+ void releaseState() override {
+ SUnits = nullptr;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return SU->getNode()->getIROrder();
+ }
+
+ bool empty() const override { return Queue.empty(); }
+
+ void push(SUnit *U) override {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) override {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const override { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void scheduledNode(SUnit *SU) override;
+
+ void unscheduledNode(SUnit *SU) override;
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != std::prev(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const override { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const override {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() override {
+ if (Queue.empty()) return nullptr;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit *> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ DAG->dumpNode(*SU);
+ }
+ }
+#endif
+};
+
+using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
+using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
+using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
+using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ if (SUNumbers[SU->NodeNum] != 0)
+ return SUNumbers[SU->NodeNum];
+
+ // Use WorkList to avoid stack overflow on excessively large IRs.
+ struct WorkState {
+ WorkState(const SUnit *SU) : SU(SU) {}
+ const SUnit *SU;
+ unsigned PredsProcessed = 0;
+ };
+
+ SmallVector<WorkState, 16> WorkList;
+ WorkList.push_back(SU);
+ while (!WorkList.empty()) {
+ auto &Temp = WorkList.back();
+ auto *TempSU = Temp.SU;
+ bool AllPredsKnown = true;
+ // Try to find a non-evaluated pred and push it into the processing stack.
+ for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
+ auto &Pred = TempSU->Preds[P];
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (SUNumbers[PredSU->NodeNum] == 0) {
+#ifndef NDEBUG
+ // In debug mode, check that we don't have such element in the stack.
+ for (auto It : WorkList)
+ assert(It.SU != PredSU && "Trying to push an element twice?");
+#endif
+ // Next time start processing this one starting from the next pred.
+ Temp.PredsProcessed = P + 1;
+ WorkList.push_back(PredSU);
+ AllPredsKnown = false;
+ break;
+ }
+ }
+
+ if (!AllPredsKnown)
+ continue;
+
+ // Once all preds are known, we can calculate the answer for this one.
+ unsigned SethiUllmanNumber = 0;
+ unsigned Extra = 0;
+ for (const SDep &Pred : TempSU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
+ assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+ SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
+ WorkList.pop_back();
+ }
+
+ assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
+ return SUNumbers[SU->NodeNum];
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (const SUnit &SU : *SUnits)
+ CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
+ }
+}
+#endif
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ MVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
+ << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ LLVM_DEBUG(dumpRegPressure());
+}
+
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = PN->getSimpleValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ LLVM_DEBUG(dumpRegPressure());
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue; // ignore chain succs
+ unsigned Height = Succ.getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (Succ.getSUnit()->getNode() &&
+ Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(Succ.getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ const SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue;
+ const SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ Pred.getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ Pred.getSUnit()->isVRegCycle = false;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ if (Pred.getSUnit()->isVRegCycle &&
+ Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ #ifndef NDEBUG
+ static const char *const PhysRegMsg[] = { " has no physreg",
+ " defines a physreg" };
+ #endif
+ LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
+ << ") " << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
+ << "): " << LPDiff << " != SU(" << right->NodeNum
+ << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses
+ << "\n");
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall)
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum
+ << "): " << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight();
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure && !SrcOrder)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
+ for (SUnit &SU : sunits)
+ initVRegCycle(&SU);
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const MCPhysReg *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
+ return false;
+
+ for (const SDep &Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+ for (const SDep &SuccPred : SuccSU->Preds) {
+ if (!SuccPred.isAssignedRegDep())
+ continue;
+
+ if (RegMask &&
+ MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const MCPhysReg *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (SUnit &SU : *SUnits) {
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU.NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU.NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ SDNode *PredFrameSetup = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (Pred.isCtrl() && Pred.getSUnit()) {
+ // Find the predecessor which is not data dependence.
+ SDNode *PredND = Pred.getSUnit()->getNode();
+
+ // If PredND is FrameSetup, we should not pre-scheduled the node,
+ // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP may hold CallResource too long and make other
+ // calls can't be scheduled. If there's no other available node
+ // to schedule, the schedular will try to rename the register by
+ // creating copy to avoid the conflict which will fail because
+ // CallResource is not a real physical register.
+ if (PredND && PredND->isMachineOpcode() &&
+ (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ PredFrameSetup = PredND;
+ break;
+ }
+ }
+ // Skip the node has FrameSetup parent.
+ if (PredFrameSetup != nullptr)
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (!Pred.isCtrl()) {
+ PredSU = Pred.getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (const SDep &PredSucc : PredSU->Succs) {
+ SUnit *PredSuccSU = PredSucc.getSUnit();
+ if (PredSuccSU == &SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(&SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ LLVM_DEBUG(
+ dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
+ << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != &SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPredQueued(&SU, Edge);
+ Edge.setSUnit(&SU);
+ scheduleDAG->AddPredQueued(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (SUnit &SU : *SUnits) {
+ if (!SU.isTwoAddress)
+ continue;
+
+ SDNode *Node = SU.getNode();
+ if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(&SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU.getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU)
+ continue;
+ for (const SDep &Succ : DUSU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU == &SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU.getHeight() &&
+ (SU.getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU.isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, &SU)) {
+ LLVM_DEBUG(dbgs()
+ << " Adding a pseudo-two-addr edge from SU #"
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000000000..d4c1fb36475e7
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,1033 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows the latency-based scheduler to notice high latency instructions
+// without a target itinerary. The choice of number here has more to do with
+// balancing scheduler heuristics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
+ InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
+ DAG = dag;
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = nullptr;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.emplace_back(N, (unsigned)SUnits.size());
+ assert((Addr == nullptr || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = newSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
+ PhysReg = Reg;
+ } else if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+ PhysReg = Reg;
+ }
+
+ if (PhysReg != 0) {
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+}
+
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
+ SDValue ExtraOper = SDValue()) {
+ SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
+
+ SDVTList VTList = DAG->getVTList(VTs);
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ SmallVector<MachineMemOperand *, 2> MMOs;
+ if (MN)
+ MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
+
+ // Reset the memory references
+ if (MN)
+ DAG->setNodeMemRefs(MN, MMOs);
+}
+
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ CloneNodeWithValues(N, DAG,
+ makeArrayRef(N->value_begin(), N->getNumValues() - 1));
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = nullptr;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Skip any load instruction that has a tied input. There may be an additional
+ // dependency requiring a different order than by increasing offsets, and the
+ // added glue may introduce a cycle.
+ auto hasTiedInput = [this](const SDNode *N) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+ if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+ return true;
+ }
+
+ return false;
+ };
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+
+ if (hasTiedInput(Base))
+ return;
+
+ // This algorithm requires a reasonably low use count before finding a match
+ // to avoid uselessly blowing up compile time in large blocks.
+ unsigned UseCount = 0;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E && UseCount < 100; ++I, ++UseCount) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User).second)
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2 ||
+ hasTiedInput(User)) {
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ }
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ // Reset UseCount to allow more matches.
+ UseCount = 0;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ llvm::sort(Offsets);
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ SDValue InGlue = SDValue(nullptr, 0);
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SDNode &NI : DAG->allnodes()) {
+ SDNode *Node = &NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SDNode &NI : DAG->allnodes()) {
+ NI.setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 32> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (const SDValue &Op : NI->op_values())
+ if (Visited.insert(Op.getNode()).second)
+ Worklist.push_back(Op.getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = newSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ computeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = forceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
+ if (!isChain && !UnitLatencies) {
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, SU, Dep);
+ }
+
+ if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ if (POpc == TargetOpcode::PATCHPOINT &&
+ Node->getValueType(0) == MVT::Other) {
+ // PATCHPOINT is defined to have one result, but it might really have none
+ // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
+ // real definition.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getSimpleValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (!Node) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dumpNodeName(SU);
+ dbgs() << ": ";
+
+ if (!SU.getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU.getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+#endif
+}
+
+void ScheduleDAGSDNodes::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getNode() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits)
+ dumpNodeAll(SU);
+ if (ExitSU.getNode() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ dumpNode(*SU);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void
+ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with the same
+ // source order number as N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (DV->isEmitted())
+ continue;
+ unsigned DVOrder = DV->getOrder();
+ if (!Order || DVOrder == Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back({DVOrder, DbgMI});
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void
+ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
+ SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
+ unsigned Order = N->getIROrder();
+ if (!Order || Seen.count(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ // If a new instruction was generated for this Order number, record it.
+ // Otherwise, leave this order number unseen: we will either find later
+ // instructions for it, or leave it unseen if there were no instructions at
+ // all.
+ if (NewInsn) {
+ Seen.insert(Order);
+ Orders.push_back({Order, NewInsn});
+ }
+
+ // Even if no instruction was generated, a Value may have become defined via
+ // earlier nodes. Try to process them now.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // Emit a node, and determine where its first instruction is for debuginfo.
+ // Zero, one, or multiple instructions can be created when emitting a node.
+ auto EmitNode =
+ [&](SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+ // Fetch instruction prior to this, or end() if nonexistant.
+ auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
+ if (I == BB->begin())
+ return BB->end();
+ else
+ return std::prev(Emitter.getInsertPos());
+ };
+
+ MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
+ Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
+ MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
+
+ // If the iterator did not change, no instructions were inserted.
+ if (Before == After)
+ return nullptr;
+
+ MachineInstr *MI;
+ if (Before == BB->end()) {
+ // There were no prior instructions; the new ones must start at the
+ // beginning of the block.
+ MI = &Emitter.getBlock()->instr_front();
+ } else {
+ // Return first instruction after the pre-existing instructions.
+ MI = &*std::next(Before);
+ }
+
+ if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+
+ return MI;
+ };
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI) {
+ BB->insert(InsertPos, DbgMI);
+ // We re-emit the dbg_value closer to its use, too, after instructions
+ // are emitted to the BB.
+ (*PDI)->clearIsEmitted();
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+
+ GluedNodes.pop_back();
+ }
+ auto NewInsn =
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
+ NewInsn);
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
+ // regardless of the host's implementation fo std::sort.
+ llvm::stable_sort(Orders, less_first());
+ std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
+ [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
+ return LHS->getOrder() < RHS->getOrder();
+ });
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ assert(MI);
+ for (; DI != DE; ++DI) {
+ if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
+ break;
+ if ((*DI)->isEmitted())
+ continue;
+
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
+ for (; DI != DE; ++DI) {
+ if ((*DI)->isEmitted())
+ continue;
+ assert((*DI)->getOrder() >= LastOrder &&
+ "emitting DBG_VALUE out of order");
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
+ }
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+
+ SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
+ SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
+ // Now emit the rest according to source order.
+ LastOrder = 0;
+ for (const auto &InstrOrder : Orders) {
+ unsigned Order = InstrOrder.first;
+ MachineInstr *MI = InstrOrder.second;
+ if (!MI)
+ continue;
+
+ // Insert all SDDbgLabel's whose order(s) are before "Order".
+ for (; DLI != DLE &&
+ (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
+ ++DLI) {
+ MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ if (DLI == DLE)
+ break;
+
+ LastOrder = Order;
+ }
+ }
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000000000..183ce4b0652d0
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,193 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class AAResults;
+class InstrItineraryData;
+
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ MachineBasicBlock *BB;
+ SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ ~ScheduleDAGSDNodes() override = default;
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<MCSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *newSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *Old);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ void BuildSchedGraph(AAResults *AA);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// computeLatency - Compute node latency.
+ ///
+ virtual void computeLatency(SUnit *SU);
+
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+ void dumpNode(const SUnit &SU) const override;
+ void dump() const override;
+ void dumpSchedule() const;
+
+ std::string getGraphNodeLabel(const SUnit *SU) const override;
+
+ std::string getDAGName() const override;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ MVT ValueType;
+
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != nullptr; }
+
+ MVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+
+ private:
+ void InitNodeNumDefs();
+ };
+
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
+ };
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 0000000000000..e7bac73678a76
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,276 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AAResults for making memory reference queries.
+ AAResults *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGVLIW() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ dumpNode(*SuccSU);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, *I);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(dumpNode(*SU));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(nullptr);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = nullptr;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ LLVM_DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(nullptr); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000000000..52a71b91d93f6
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,9631 @@
+//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
+
+void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+
+#define DEBUG_TYPE "selectiondag"
+
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
+static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
+ LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ auto *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasUndefs;
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
+ EltSize) &&
+ EltSize == SplatBitSize;
+}
+
+// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
+// specializations of the more general isConstantSplatVector()?
+
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).isUndef())
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
+ SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
+ return false;
+ return true;
+}
+
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ bool IsAllUndef = true;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ IsAllUndef = false;
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target
+ // and a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all zeros, not whether the individual
+ // constants are.
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
+ if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ return false;
+ } else
+ return false;
+ }
+
+ // Do not accept an all-undef vector.
+ if (IsAllUndef)
+ return false;
+ return true;
+}
+
+bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantFPSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+ return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
+}
+
+bool ISD::matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match,
+ bool AllowUndefs) {
+ // FIXME: Add support for scalar UNDEF cases?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ // FIXME: Add support for vector UNDEF cases?
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (AllowUndefs && Op.getOperand(i).isUndef()) {
+ if (!Match(nullptr))
+ return false;
+ continue;
+ }
+
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+ bool AllowUndefs, bool AllowTypeMismatch) {
+ if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ // TODO: Add support for scalar UNDEF cases?
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ // TODO: Add support for vector UNDEF cases?
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ bool LHSUndef = AllowUndefs && LHSOp.isUndef();
+ bool RHSUndef = AllowUndefs && RHSOp.isUndef();
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp);
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
+ if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
+ return false;
+ if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType()))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+ case ISD::SEXTLOAD:
+ return ISD::SIGN_EXTEND;
+ case ISD::ZEXTLOAD:
+ return ISD::ZERO_EXTEND;
+ default:
+ break;
+ }
+
+ llvm_unreachable("Invalid LoadExtType");
+}
+
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+/// For an integer comparison, return 1 if the comparison is a signed operation
+/// and 2 if the result is an unsigned comparison. Return zero if the operation
+/// does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set, then the resultant comparison DOES suddenly
+ // care about orderedness, and it is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (IsInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDValue> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDUse> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
+ SDVTList VTList, ArrayRef<SDValue> OpList) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList);
+}
+
+/// If this is an SDNode with special info, add this info to the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ case ISD::MCSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant: {
+ const ConstantSDNode *C = cast<ConstantSDNode>(N);
+ ID.AddPointer(C->getConstantIntValue());
+ ID.AddBoolean(C->isOpaque());
+ break;
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP:
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ if (cast<LifetimeSDNode>(N)->hasOffset()) {
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getSize());
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
+ }
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MLOAD: {
+ const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ ID.AddInteger(MLD->getMemoryVT().getRawBits());
+ ID.AddInteger(MLD->getRawSubclassData());
+ ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSTORE: {
+ const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ ID.AddInteger(MST->getMemoryVT().getRawBits());
+ ID.AddInteger(MST->getRawSubclassData());
+ ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MGATHER: {
+ const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+ ID.AddInteger(MG->getMemoryVT().getRawBits());
+ ID.AddInteger(MG->getRawSubclassData());
+ ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSCATTER: {
+ const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+ ID.AddInteger(MS->getMemoryVT().getRawBits());
+ ID.AddInteger(MS->getRawSubclassData());
+ ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->ops());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+ // Skip to next node if we've already managed to delete the node. This could
+ // happen if replacing a node causes a node previously added to the node to
+ // be deleted.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, nullptr);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
+ RemoveDeadNodes(DeadNodes);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N->getIterator() != AllNodes.begin() &&
+ "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SDDbgInfo::erase(const SDNode *Node) {
+ DbgValMapType::iterator I = DbgValMap.find(Node);
+ if (I == DbgValMap.end())
+ return;
+ for (auto &Val: I->second)
+ Val->setIsInvalidated();
+ DbgValMap.erase(I);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ // If we have operands, deallocate them.
+ removeOperands(N);
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ // FIXME: There are places in SDag that have grown a dependency on the opcode
+ // value in the released node.
+ __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
+ N->NodeType = ISD::DELETED_NODE;
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate
+ // them and forget about that node.
+ DbgInfo->erase(N);
+}
+
+#ifndef NDEBUG
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+#endif // NDEBUG
+
+/// Insert a newly allocated node into the DAG.
+///
+/// Handles insertion into the all nodes list and CSE map, as well as
+/// verification and other common operations when a new node is allocated.
+void SelectionDAG::InsertNode(SDNode *N) {
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
+ VerifySDNode(N);
+#endif
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeInserted(N);
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(
+ ESN->getSymbol(), ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::MCSymbol: {
+ auto *MCSN = cast<MCSymbolSDNode>(N);
+ Erased = MCSymbols.erase(MCSN->getMCSymbol());
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing);
+
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ Node->intersectFlagsWith(N->getFlags());
+ return Node;
+}
+
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return getDataLayout().getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
+ : TM(tm), OptLevel(OL),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ Root(getEntryNode()) {
+ InsertNode(&EntryNode);
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &NewMF,
+ OptimizationRemarkEmitter &NewORE,
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ LegacyDivergenceAnalysis * Divergence) {
+ MF = &NewMF;
+ SDAGISelPass = PassPtr;
+ ORE = &NewORE;
+ TLI = getSubtarget().getTargetLowering();
+ TSI = getSubtarget().getSelectionDAGInfo();
+ LibInfo = LibraryInfo;
+ Context = &MF->getFunction().getContext();
+ DA = Divergence;
+}
+
+SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
+ "debug location. Use another overload.");
+ }
+ }
+ return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ const SDLoc &DL, void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ // Erase debug location from the node if the node is used at several
+ // different places. Do not propagate one location to all uses as it
+ // will cause a worse single stepping debugging experience.
+ if (N->getDebugLoc() != DL.getDebugLoc())
+ N->setDebugLoc(DebugLoc());
+ break;
+ default:
+ // When the node's point of use is located earlier in the instruction
+ // sequence than its prior point of use, update its debug info to the
+ // earlier location.
+ if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
+ N->setDebugLoc(DL.getDebugLoc());
+ break;
+ }
+ }
+ return N;
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ MCSymbols.clear();
+ SDCallSiteDbgInfo.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(nullptr));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(nullptr));
+
+ EntryNode.UseList = nullptr;
+ InsertNode(&EntryNode);
+ Root = getEntryNode();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::FP_EXTEND, DL, VT, Op)
+ : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+}
+
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
+ EVT OpVT) {
+ if (VT.bitsLE(Op.getValueType()))
+ return getNode(ISD::TRUNCATE, SL, VT, Op);
+
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
+ return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType().getScalarType() == VT) return Op;
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, DL, Op.getValueType()));
+}
+
+SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZExtOrTrunc(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZeroExtendInReg(Op, DL, VT);
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
+ return getNode(ISD::XOR, DL, VT, Val, TrueValue);
+}
+
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+ EVT OpVT) {
+ if (!V)
+ return getConstant(0, DL, VT);
+
+ switch (TLI->getBooleanContents(OpVT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ return getConstant(1, DL, VT);
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return getAllOnesConstant(DL, VT);
+ }
+ llvm_unreachable("Unexpected boolean content enum!");
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
+ EVT VT, bool isT, bool isO) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ const ConstantInt *Elt = &Val;
+
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ const APInt &NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+ .zextOrTrunc(ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+ }
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (getDataLayout().isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+ SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ return V;
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ ID.AddPointer(Elt);
+ ID.AddBoolean(isO);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
+
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
+}
+
+SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
+ const SDLoc &DL, bool LegalTypes) {
+ EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+ return getConstant(Val, DL, ShiftVT);
+}
+
+SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
+ EVT VT, bool isTarget) {
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ ID.AddPointer(&V);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
+ NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT == MVT::f32)
+ return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
+ else if (EltVT == MVT::f64)
+ return getConstantFP(APFloat(Val), DL, VT, isTarget);
+ else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
+ EltVT == MVT::f16) {
+ bool Ignored;
+ APFloat APF = APFloat(Val);
+ APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &Ignored);
+ return getConstantFP(APF, DL, VT, isTarget);
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
+ EVT VT, int64_t Offset, bool isTargetGA,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ if (BitWidth < 64)
+ Offset = SignExtend64(Offset, BitWidth);
+
+ unsigned Opc;
+ if (GV->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<GlobalAddressSDNode>(
+ Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(FI);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = MF->getFunction().hasOptSize()
+ ? getDataLayout().getABITypeAlignment(C->getType())
+ : getDataLayout().getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->addSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
+ ID.AddPointer(MBB);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BasicBlockSDNode>(MBB);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = newSDNode<VTSDNode>(VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
+ SDNode *&N = MCSymbols[Sym];
+ if (N)
+ return SDValue(N, 0);
+ N = newSDNode<MCSymbolSDNode>(Sym, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (!CondCodeNodes[Cond]) {
+ auto *N = newSDNode<CondCodeSDNode>(Cond);
+ CondCodeNodes[Cond] = N;
+ InsertNode(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
+/// point at N1 to point at N2 and indices that point at N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
+ std::swap(N1, N2);
+ ShuffleVectorSDNode::commuteMask(M);
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
+ SDValue N2, ArrayRef<int> Mask) {
+ assert(VT.getVectorNumElements() == Mask.size() &&
+ "Must have the same number of vector elements as mask elements!");
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ "Invalid VECTOR_SHUFFLE");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ int NElts = Mask.size();
+ assert(llvm::all_of(Mask,
+ [&](int M) { return M < (NElts * 2) && M >= -1; }) &&
+ "Index out of range");
+
+ // Copy the mask so we can do any needed cleanup.
+ SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (int i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.isUndef())
+ commuteShuffle(N1, N2, MaskVec);
+
+ if (TLI->hasVectorBlend()) {
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+ }
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.isUndef();
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+ // Reset our undef status after accounting for the mask.
+ N2Undef = N2.isUndef();
+ // Re-check whether both sides ended up undef.
+ if (N1.isUndef() && N2Undef)
+ return getUNDEF(VT);
+
+ // If Identity shuffle return that node.
+ bool Identity = true, AllSame = true;
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
+ }
+ if (Identity && NElts)
+ return N1;
+
+ // Shuffling a constant splat doesn't change the result.
+ if (N2Undef) {
+ SDValue V = N1;
+
+ // Look through any bitcasts. We check that these don't change the number
+ // (and size) of elements and just changes their types.
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ // A splat should always show up as a build vector node.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ // If this is a splat of an undef, shuffling it is also undef.
+ if (Splat && Splat.isUndef())
+ return getUNDEF(VT);
+
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
+ // We only have a splat which can skip shuffles if there is a splatted
+ // value and no undef lanes rearranged by the shuffle.
+ if (Splat && UndefElements.none()) {
+ // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+ // number of elements match or the value splatted is a zero constant.
+ if (SameNumElts)
+ return N1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+ if (C->isNullValue())
+ return N1;
+ }
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ EVT BuildVT = BV->getValueType(0);
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
+ for (int i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ llvm::copy(MaskVec, MaskAlloc);
+
+ auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
+ dl.getDebugLoc(), MaskAlloc);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
+ EVT VT = SV.getValueType(0);
+ SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ ShuffleVectorSDNode::commuteMask(MaskVec);
+
+ SDValue Op0 = SV.getOperand(0);
+ SDValue Op1 = SV.getOperand(1);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
+ ID.AddInteger(RegNo);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
+ ID.AddPointer(RegMask);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
+ MCSymbol *Label) {
+ return getLabelNode(ISD::EH_LABEL, dl, Root, Label);
+}
+
+SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
+ SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops);
+ ID.AddPointer(Label);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N =
+ newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset, bool isTarget,
+ unsigned TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddPointer(BA);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || V->getType()->isPointerTy()) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
+ ID.AddPointer(V);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SrcValueSDNode>(V);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
+ ID.AddPointer(MD);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<MDNodeSDNode>(MD);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
+ if (VT == V.getValueType())
+ return V;
+
+ return getNode(ISD::BITCAST, SDLoc(V), VT, V);
+}
+
+SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
+ unsigned SrcAS, unsigned DestAS) {
+ SDValue Ops[] = {Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
+ ID.AddInteger(SrcAS);
+ ID.AddInteger(DestAS);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VT, SrcAS, DestAS);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ const MaybeAlign MA(Node->getConstantOperandVal(3));
+
+ SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
+ Tmp2, MachinePointerInfo(V));
+ SDValue VAList = VAListLoad;
+
+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(MA->value() - 1, dl, VAList.getValueType()));
+
+ VAList =
+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 =
+ getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS));
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD));
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const DataLayout &DL = getDataLayout();
+ unsigned Align =
+ std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
+ ISD::CondCode Cond, const SDLoc &dl) {
+ EVT OpVT = N1.getValueType();
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!OpVT.isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (OpVT.isInteger()) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ // icmp eq/ne X, undef -> undef.
+ if ((N1.isUndef() || N2.isUndef()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE))
+ return getUNDEF(VT);
+
+ // If both operands are undef, we can return undef for int comparison.
+ // icmp undef, undef -> undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if (N1 == N2)
+ return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
+ case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
+ case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
+ case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
+ case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
+ case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
+ case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
+ case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
+ case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
+ }
+ }
+ }
+
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ if (N1CFP && N2CFP) {
+ APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ }
+ } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
+ // Ensure that the constant occurs on the RHS.
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
+ return SDValue();
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
+ } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
+ (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
+ // If an operand is known to be a nan (or undef that could be a nan), we can
+ // fold it.
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fails.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default:
+ llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return getBoolConstant(false, dl, VT, OpVT);
+ case 1: // Known true.
+ return getBoolConstant(true, dl, VT, OpVT);
+ case 2: // Undefined.
+ return getUNDEF(VT);
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return GetDemandedBits(V, DemandedBits, DemandedElts);
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used in the elements specified by
+/// DemandedElts.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
+ switch (V.getOpcode()) {
+ default:
+ break;
+ case ISD::Constant: {
+ auto *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & DemandedBits;
+ if (NewVal != CVal)
+ return getConstant(NewVal, SDLoc(V), V.getValueType());
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SIGN_EXTEND_INREG:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= DemandedBits.getBitWidth())
+ break;
+ APInt SrcDemandedBits = DemandedBits << Amt;
+ if (SDValue SimplifyLHS =
+ GetDemandedBits(V.getOperand(0), SrcDemandedBits))
+ return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
+ V.getOperand(1));
+ }
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ // Also handle the case where masked out bits in X are known to be zero.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
+ const APInt &AndVal = RHSC->getAPIntValue();
+ if (DemandedBits.isSubsetOf(AndVal) ||
+ DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
+ AndVal))
+ return V.getOperand(0);
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ SDValue Src = V.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ // Being conservative here - only peek through if we only demand bits in the
+ // non-extended source (even though the extended bits are technically
+ // undef).
+ if (DemandedBits.getActiveBits() > SrcBitWidth)
+ break;
+ APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
+ if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
+ return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
+ break;
+ }
+ }
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
+/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
+bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
+}
+
+/// isSplatValue - Return true if the vector V has the same value
+/// across all DemandedElts.
+bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
+ APInt &UndefElts) {
+ if (!DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
+ UndefElts = APInt::getNullValue(NumElts);
+
+ switch (V.getOpcode()) {
+ case ISD::BUILD_VECTOR: {
+ SDValue Scl;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = V.getOperand(i);
+ if (Op.isUndef()) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (Scl && Scl != Op)
+ return false;
+ Scl = Op;
+ }
+ return true;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO: Do we need to handle shuffle(splat, undef, mask)?
+ int SplatIndex = -1;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (!DemandedElts[i])
+ continue;
+ if (0 <= SplatIndex && SplatIndex != M)
+ return false;
+ SplatIndex = M;
+ }
+ return true;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Src = V.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt UndefSrcElts;
+ APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
+ return true;
+ }
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Helper wrapper to main isSplatValue function.
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+ EVT VT = V.getValueType();
+ assert(VT.isVector() && "Vector type expected");
+ unsigned NumElts = VT.getVectorNumElements();
+
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return isSplatValue(V, DemandedElts, UndefElts) &&
+ (AllowUndefs || !UndefElts);
+}
+
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+ V = peekThroughExtractSubvectors(V);
+
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V) {
+ int SplatIdx;
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ getIntPtrConstant(SplatIdx, SDLoc(V)));
+ return SDValue();
+}
+
+/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
+/// is less than the element bit-width of the shift node, return it.
+static const APInt *getValidShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.ult(BitWidth))
+ return &ShAmt;
+ }
+ return nullptr;
+}
+
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the minimum value.
+static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MinShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MinShAmt && MinShAmt->ule(ShAmt))
+ continue;
+ MinShAmt = &ShAmt;
+ }
+ return MinShAmt;
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in Known. For vectors, the known bits are those that are shared by
+/// every vector element.
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return computeKnownBits(Op, DemandedElts, Depth);
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in Known. The DemandedElts argument allows us to only collect the known
+/// bits that are shared by the requested vector elements.
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+
+ KnownBits Known(BitWidth); // Don't know anything.
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ // We know all of the bits for a constant!
+ Known.One = C->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return Known;
+ }
+ if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ // We know all of the bits for a constant fp!
+ Known.One = C->getValueAPF().bitcastToAPInt();
+ Known.Zero = ~Known.One;
+ return Known;
+ }
+
+ if (Depth >= MaxRecursionDepth)
+ return Known; // Limit search depth.
+
+ KnownBits Known2;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ if (!DemandedElts)
+ return Known; // No demanded elts, better to assume we don't know anything.
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::BUILD_VECTOR:
+ // Collect the known bits that are shared by every demanded vector element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Known2 = computeKnownBits(SrcOp, Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != BitWidth) {
+ assert(SrcOp.getValueSizeInBits() > BitWidth &&
+ "Expected BUILD_VECTOR implicit truncation");
+ Known2 = Known2.trunc(BitWidth);
+ }
+
+ // Known bits are the values that are shared by every demanded element.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the known bits that are shared by every vector element referenced
+ // by the shuffle.
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ int M = SVN->getMaskElt(i);
+ if (M < 0) {
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ Known.resetAll();
+ DemandedLHS.clearAllBits();
+ DemandedRHS.clearAllBits();
+ break;
+ }
+
+ if ((unsigned)M < NumElts)
+ DemandedLHS.setBit((unsigned)M % NumElts);
+ else
+ DemandedRHS.setBit((unsigned)M % NumElts);
+ }
+ // Known bits are the values that are shared by every demanded element.
+ if (!!DemandedLHS) {
+ SDValue LHS = Op.getOperand(0);
+ Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ if (!!DemandedRHS) {
+ SDValue RHS = Op.getOperand(1);
+ Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ // Split DemandedElts and test each of the demanded subvectors.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; i != NumSubVectors; ++i) {
+ APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+ DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ if (!!DemandedSub) {
+ SDValue Sub = Op.getOperand(i);
+ Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ } else {
+ Known = computeKnownBits(Sub, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ Known2 = computeKnownBits(Src, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If we know the element index, just demand that subvector elements,
+ // otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ // We know about scalar_to_vector as much as we know about it source,
+ // which becomes the first element of otherwise unknown vector.
+ if (DemandedElts != 1)
+ break;
+
+ SDValue N0 = Op.getOperand(0);
+ Known = computeKnownBits(N0, Depth + 1);
+ if (N0.getValueSizeInBits() != BitWidth)
+ Known = Known.trunc(BitWidth);
+
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue N0 = Op.getOperand(0);
+ EVT SubVT = N0.getValueType();
+ unsigned SubBitWidth = SubVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types.
+ if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (BitWidth == SubBitWidth) {
+ Known = computeKnownBits(N0, DemandedElts, Depth + 1);
+ break;
+ }
+
+ bool IsLE = getDataLayout().isLittleEndian();
+
+ // Bitcast 'small element' vector to 'large element' scalar/vector.
+ if ((BitWidth % SubBitWidth) == 0) {
+ assert(N0.getValueType().isVector() && "Expected bitcast from vector");
+
+ // Collect known bits for the (larger) output by collecting the known
+ // bits from each set of sub elements and shift these into place.
+ // We need to separately call computeKnownBits for each set of
+ // sub elements as the knownbits for each is likely to be different.
+ unsigned SubScale = BitWidth / SubBitWidth;
+ APInt SubDemandedElts(NumElts * SubScale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SubDemandedElts.setBit(i * SubScale);
+
+ for (unsigned i = 0; i != SubScale; ++i) {
+ Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
+ Depth + 1);
+ unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+ Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
+ Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
+ }
+ }
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ if ((SubBitWidth % BitWidth) == 0) {
+ assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+
+ // Collect known bits for the (smaller) output by collecting the known
+ // bits from the overlapping larger input elements and extracting the
+ // sub sections we actually care about.
+ unsigned SubScale = SubBitWidth / BitWidth;
+ APInt SubDemandedElts(NumElts / SubScale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SubDemandedElts.setBit(i / SubScale);
+
+ Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
+
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
+ Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
+ Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ }
+ }
+ break;
+ }
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ Known.One &= Known2.One;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ Known.Zero |= Known2.Zero;
+ break;
+ case ISD::OR:
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ Known.Zero &= Known2.Zero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ Known.One |= Known2.One;
+ break;
+ case ISD::XOR: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+ Known.Zero = KnownZeroOut;
+ break;
+ }
+ case ISD::MUL: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ unsigned TrailZ = Known.countMinTrailingZeros() +
+ Known2.countMinTrailingZeros();
+ unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
+ Known2.countMinLeadingZeros(),
+ BitWidth) - BitWidth;
+
+ Known.resetAll();
+ Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
+ Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ break;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ unsigned LeadZ = Known2.countMinLeadingZeros();
+
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
+ if (RHSMaxLeadingZeros != BitWidth)
+ LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
+
+ Known.Zero.setHighBits(LeadZ);
+ break;
+ }
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ case ISD::SELECT_CC:
+ Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ case ISD::SHL:
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero <<= Shift;
+ Known.One <<= Shift;
+ // Low bits are known zero.
+ Known.Zero.setLowBits(Shift);
+ }
+ break;
+ case ISD::SRL:
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero.lshrInPlace(Shift);
+ Known.One.lshrInPlace(Shift);
+ // High bits are known zero.
+ Known.Zero.setHighBits(Shift);
+ } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
+ // Minimum shift high bits are known zero.
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+ }
+ break;
+ case ISD::SRA:
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ unsigned Shift = ShAmt->getZExtValue();
+ // Sign extend known zero/one bit (else is unknown).
+ Known.Zero.ashrInPlace(Shift);
+ Known.One.ashrInPlace(Shift);
+ }
+ break;
+ case ISD::FSHL:
+ case ISD::FSHR:
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+ DemandedElts, Depth + 1);
+ break;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Opcode == ISD::FSHL) {
+ Known.One <<= Amt;
+ Known.Zero <<= Amt;
+ Known2.One.lshrInPlace(BitWidth - Amt);
+ Known2.Zero.lshrInPlace(BitWidth - Amt);
+ } else {
+ Known.One <<= BitWidth - Amt;
+ Known.Zero <<= BitWidth - Amt;
+ Known2.One.lshrInPlace(Amt);
+ Known2.Zero.lshrInPlace(Amt);
+ }
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
+
+ APInt InSignMask = APInt::getSignMask(EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignMask = InSignMask.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignMask;
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.One &= InputDemandedBits;
+ Known.Zero &= InputDemandedBits;
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
+ Known.Zero |= NewBits;
+ Known.One &= ~NewBits;
+ } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
+ Known.One |= NewBits;
+ Known.Zero &= ~NewBits;
+ } else { // Input sign bit unknown
+ Known.Zero &= ~NewBits;
+ Known.One &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleTZ = Known2.countMaxTrailingZeros();
+ unsigned LowBits = Log2_32(PossibleTZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleLZ = Known2.countMaxLeadingZeros();
+ unsigned LowBits = Log2_32(PossibleLZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
+ case ISD::CTPOP: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If we know some of the bits are zero, they can't be one.
+ unsigned PossibleOnes = Known2.countMaxPopulation();
+ Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+ if (ISD::isNON_EXTLoad(LD) && Cst) {
+ // Determine any common known bits from the loaded constant pool value.
+ Type *CstTy = Cst->getType();
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ // If its a vector splat, then we can (quickly) reuse the scalar path.
+ // NOTE: We assume all elements match and none are UNDEF.
+ if (CstTy->isVectorTy()) {
+ if (const Constant *Splat = Cst->getSplatValue()) {
+ Cst = Splat;
+ CstTy = Cst->getType();
+ }
+ }
+ // TODO - do we need to handle different bitwidths?
+ if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+ // Iterate across all vector elements finding common known bits.
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ }
+ Known.One.clearAllBits();
+ Known.Zero.clearAllBits();
+ break;
+ }
+ } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+ const APInt &Value = CInt->getValue();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ }
+ }
+ }
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarSizeInBits();
+ Known.Zero.setBitsFrom(MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ Known.Zero |= (~InMask);
+ Known.One &= (~Known.Zero);
+ break;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ Known.Zero.setBitsFrom(1);
+ break;
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SUB:
+ case ISD::SUBC: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
+ Known, Known2);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::ADDCARRY:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE: {
+ assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
+
+ // With ADDE and ADDCARRY, a carry bit may be added in.
+ KnownBits Carry(1);
+ if (Opcode == ISD::ADDE)
+ // Can't track carry from glue, set carry to unknown.
+ Carry.resetAll();
+ else if (Opcode == ISD::ADDCARRY)
+ // TODO: Compute known bits for the carry operand. Not sure if it is worth
+ // the trouble (how often will we find a known carry bit). And I haven't
+ // tested this very much yet, but something like this might work:
+ // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry = Carry.zextOrTrunc(1, false);
+ Carry.resetAll();
+ else
+ Carry.setAllZero();
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
+ break;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ Known.Zero = Known2.Zero & LowBits;
+ Known.One = Known2.One & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
+ Known.Zero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (Known2.isNegative() && LowBits.intersects(Known2.One))
+ Known.One |= ~LowBits;
+ assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ break;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // The upper bits are all zero, the lower ones are unchanged.
+ Known.Zero = Known2.Zero | ~LowBits;
+ Known.One = Known2.One & LowBits;
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ uint32_t Leaders =
+ std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
+ Known.resetAll();
+ Known.Zero.setHighBits(Leaders);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ const unsigned Index = Op.getConstantOperandVal(1);
+ const unsigned EltBitWidth = Op.getValueSizeInBits();
+
+ // Remove low part of known bits mask
+ Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+ Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+
+ // Remove high part of known bit mask
+ Known = Known.trunc(EltBitWidth);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
+ const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ if (BitWidth > EltBitWidth)
+ Known = Known.trunc(EltBitWidth);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
+ // If we know the element index, just demand that vector element.
+ unsigned Idx = ConstEltNo->getZExtValue();
+ APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+ Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Known = computeKnownBits(InVec, Depth + 1);
+ }
+ if (BitWidth > EltBitWidth)
+ Known = Known.zext(BitWidth, false /* => any extend */);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then add its common known bits.
+ if (DemandedElts[EltIdx]) {
+ Known2 = computeKnownBits(InVal, Depth + 1);
+ Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+ Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
+ }
+
+ // If we demand the source vector then add its common known bits, ensuring
+ // that we don't demand the inserted element.
+ APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
+ if (!!VectorElts) {
+ Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Known = computeKnownBits(InVec, Depth + 1);
+ Known2 = computeKnownBits(InVal, Depth + 1);
+ Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+ Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
+ }
+ break;
+ }
+ case ISD::BITREVERSE: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero = Known2.Zero.reverseBits();
+ Known.One = Known2.One.reverseBits();
+ break;
+ }
+ case ISD::BSWAP: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero = Known2.Zero.byteSwap();
+ Known.One = Known2.One.byteSwap();
+ break;
+ }
+ case ISD::ABS: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If the source's MSB is zero then we know the rest of the bits already.
+ if (Known2.isNonNegative()) {
+ Known.Zero = Known2.Zero;
+ Known.One = Known2.One;
+ break;
+ }
+
+ // We only know that the absolute values's MSB will be zero iff there is
+ // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
+ Known2.One.clearSignBit();
+ if (Known2.One.getBoolValue()) {
+ Known.Zero = APInt::getSignMask(BitWidth);
+ break;
+ }
+ break;
+ }
+ case ISD::UMIN: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // UMIN - we know that the result will have the maximum of the
+ // known zero leading bits of the inputs.
+ unsigned LeadZero = Known.countMinLeadingZeros();
+ LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
+
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
+ Known.Zero.setHighBits(LeadZero);
+ break;
+ }
+ case ISD::UMAX: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // UMAX - we know that the result will have the maximum of the
+ // known one leading bits of the inputs.
+ unsigned LeadOne = Known.countMinLeadingOnes();
+ LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
+
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
+ Known.One.setHighBits(LeadOne);
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ const APInt &ValueHigh = CstHigh->getAPIntValue();
+ if (ValueLow.sle(ValueHigh)) {
+ unsigned LowSignBits = ValueLow.getNumSignBits();
+ unsigned HighSignBits = ValueHigh.getNumSignBits();
+ unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+ if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+ Known.One.setHighBits(MinSignBits);
+ break;
+ }
+ if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+ Known.Zero.setHighBits(MinSignBits);
+ break;
+ }
+ }
+ }
+
+ // Fallback - just get the shared known bits of the operands.
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Known.isUnknown()) break; // Early-out
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
+ break;
+
+ default:
+ if (Opcode < ISD::BUILTIN_OP_END)
+ break;
+ LLVM_FALLTHROUGH;
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Allow the target to implement this method for its nodes.
+ TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
+ break;
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ return Known;
+}
+
+SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
+ SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ KnownBits N1Known = computeKnownBits(N1);
+ if (N1Known.Zero.getBoolValue()) {
+ KnownBits N0Known = computeKnownBits(N0);
+
+ bool overflow;
+ (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
+ if (!overflow)
+ return OFK_Never;
+ }
+
+ // mulhi + 1 never overflow
+ if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
+ (~N1Known.Zero & 0x01) == ~N1Known.Zero)
+ return OFK_Never;
+
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
+ KnownBits N0Known = computeKnownBits(N0);
+
+ if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
+ return OFK_Never;
+ }
+
+ return OFK_Sometime;
+}
+
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarSizeInBits();
+
+ // Is the constant a known power of 2?
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
+ return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL) {
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
+ if (C && C->getAPIntValue() == 1)
+ return true;
+ }
+
+ // Similarly, a logical right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL) {
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
+ if (C && C->getAPIntValue().isSignMask())
+ return true;
+ }
+
+ // Are all operands of a build vector constant powers of two?
+ if (Val.getOpcode() == ISD::BUILD_VECTOR)
+ if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ return false;
+ }))
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ KnownBits Known = computeKnownBits(Val);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ const APInt &Val = C->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ if (Depth >= MaxRecursionDepth)
+ return 1; // Limit search depth.
+
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::BUILD_VECTOR:
+ Tmp = VTBits;
+ for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ return Tmp;
+
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = SVN->getMaskElt(i);
+ if (!DemandedElts[i])
+ continue;
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M < 0)
+ return 1;
+ if ((unsigned)M < NumElts)
+ DemandedLHS.setBit((unsigned)M % NumElts);
+ else
+ DemandedRHS.setBit((unsigned)M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ case ISD::BITCAST: {
+ SDValue N0 = Op.getOperand(0);
+ EVT SrcVT = N0.getValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types..
+ if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (VTBits == SrcBits)
+ return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+
+ bool IsLE = getDataLayout().isLittleEndian();
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ if ((SrcBits % VTBits) == 0) {
+ assert(VT.isVector() && "Expected bitcast to vector");
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts(NumElts / Scale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ // Fast case - sign splat can be simply split across the small elements.
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
+ if (Tmp == SrcBits)
+ return VTBits;
+
+ // Slow case - determine how far the sign extends into each sub-element.
+ Tmp2 = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned SubOffset = i % Scale;
+ SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset);
+ SubOffset = SubOffset * VTBits;
+ if (Tmp <= SubOffset)
+ return 1;
+ Tmp2 = std::min(Tmp2, Tmp - SubOffset);
+ }
+ return Tmp2;
+ }
+ break;
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
+ Tmp = VTBits-Tmp+1;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ return std::max(Tmp, Tmp2);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
+ Tmp = VTBits - SrcVT.getScalarSizeInBits();
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
+ }
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ APInt ShiftVal = C->getAPIntValue();
+ ShiftVal += Tmp;
+ Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ if (C->getAPIntValue().uge(VTBits) || // Bad shift.
+ C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // computeKnownBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+ if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+ Tmp = CstLow->getAPIntValue().getNumSignBits();
+ Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+ return std::min(Tmp, Tmp2);
+ }
+ }
+
+ // Fallback - just get the minimum number of sign bits of the operands.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
+ case ISD::UMIN:
+ case ISD::UMAX:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ // If setcc returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(VT.isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getAPIntValue().urem(VTBits);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Opcode == ISD::ROTR)
+ RotAmt = (VTBits - RotAmt) % VTBits;
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
+ }
+ break;
+ case ISD::ADD:
+ case ISD::ADDC:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known.isNonNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (Known.isNonNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ case ISD::MUL: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (SignBitsOp0 == 1)
+ break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (SignBitsOp1 == 1)
+ break;
+ unsigned OutValidBits =
+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
+ }
+ case ISD::TRUNCATE: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueSizeInBits();
+ const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
+
+ // If the sign portion ends in our element the subtraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then get its sign bits.
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (DemandedElts[EltIdx]) {
+ // TODO - handle implicit truncation of inserted elements.
+ if (InVal.getScalarValueSizeInBits() != VTBits)
+ break;
+ Tmp = ComputeNumSignBits(InVal, Depth + 1);
+ }
+
+ // If we demand the source vector then get its sign bits, and determine
+ // the minimum.
+ APInt VectorElts = DemandedElts;
+ VectorElts.clearBit(EltIdx);
+ if (!!VectorElts) {
+ Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
+ const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
+ // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
+ // anything about sign bits. But if the sizes match we can derive knowledge
+ // about sign bits from the vector operand.
+ if (BitWidth != EltBitWidth)
+ break;
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If we know the element index, just demand that subvector elements,
+ // otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ }
+ case ISD::CONCAT_VECTORS: {
+ // Determine the minimum number of sign bits across all demanded
+ // elts of the input vectors. Early out if the result is already 1.
+ Tmp = std::numeric_limits<unsigned>::max();
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
+ APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+ DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ if (!DemandedSub)
+ continue;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Tmp = std::numeric_limits<unsigned>::max();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
+ // Not able to determine the index so just assume worst case.
+ Tmp = ComputeNumSignBits(Sub, Depth + 1);
+ if (Tmp == 1) return 1; // early-out
+ Tmp2 = ComputeNumSignBits(Src, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+ }
+
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
+ case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp;
+ case ISD::NON_EXTLOAD:
+ if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+ // We only need to handle vectors - computeKnownBits should handle
+ // scalar cases.
+ Type *CstTy = Cst->getType();
+ if (CstTy->isVectorTy() &&
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+ Tmp = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ }
+ // Unknown type. Conservatively assume no bits match sign bit.
+ return 1;
+ }
+ return Tmp;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ unsigned NumBits =
+ TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
+
+ APInt Mask;
+ if (Known.isNonNegative()) { // sign bit is 0
+ Mask = Known.Zero;
+ } else if (Known.isNegative()) { // sign bit is 1;
+ Mask = Known.One;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
+ return false;
+
+ return true;
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ return true;
+
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ // TODO: Handle vectors.
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ return !C->getValueAPF().isNaN() ||
+ (SNaN && !C->getValueAPF().isSignaling());
+ }
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ if (SNaN)
+ return true;
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case ISD::FCANONICALIZE:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FROUND:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::FABS:
+ case ISD::FNEG:
+ case ISD::FCOPYSIGN: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SELECT:
+ return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return true;
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (SNaN)
+ return true;
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case ISD::FSQRT: // Need is known positive
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FPOWI:
+ case ISD::FPOW: {
+ if (SNaN)
+ return true;
+ // TODO: Refine on operand
+ return false;
+ }
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ }
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // TODO: Does this quiet or return the origina NaN as-is?
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
+ return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+ }
+
+ return false;
+ }
+}
+
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() &&
+ "Floating point type expected");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ // TODO: Add BuildVector support.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ assert(!Op.getValueType().isFloatingPoint() &&
+ "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (ISD::matchUnaryPredicate(
+ Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ return true;
+
+ // TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (isKnownNeverZero(Op.getOperand(1)) ||
+ isKnownNeverZero(Op.getOperand(0)))
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
+}
+
+static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ int NumOps = Ops.size();
+ assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(VT.getVectorNumElements() == (unsigned)NumOps &&
+ "Incorrect element count in BUILD_VECTOR!");
+
+ // BUILD_VECTOR of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (int i = 0; i != NumOps; ++i) {
+ if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ops[i].getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
+ cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+ IsIdentity = false;
+ break;
+ }
+ IdentitySrc = Ops[i].getOperand(0);
+ }
+ if (IsIdentity)
+ return IdentitySrc;
+
+ return SDValue();
+}
+
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ SelectionDAG &DAG) {
+ assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
+ assert(llvm::all_of(Ops,
+ [Ops](SDValue Op) {
+ return Ops[0].getValueType() == Op.getValueType();
+ }) &&
+ "Concatenation of vectors with inconsistent value types!");
+ assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
+ VT.getVectorNumElements() &&
+ "Incorrect element count in vector concatenation!");
+
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // Concat of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // Scan the operands and look for extract operations from a single source
+ // that correspond to insertion at the same location via this concatenation:
+ // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ SDValue Op = Ops[i];
+ unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op.getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ Op.getConstantOperandVal(1) != IdentityIndex) {
+ IsIdentity = false;
+ break;
+ }
+ assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+ "Unexpected identity source vector for concat of extracts");
+ IdentitySrc = Op.getOperand(0);
+ }
+ if (IsIdentity) {
+ assert(IdentitySrc && "Failed to set source vector of extracts");
+ return IdentitySrc;
+ }
+
+ // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
+ // simplified to one big BUILD_VECTOR.
+ // FIXME: Add support for SCALAR_TO_VECTOR as well.
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 16> Elts;
+ for (SDValue Op : Ops) {
+ EVT OpVT = Op.getValueType();
+ if (Op.isUndef())
+ Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
+ else if (Op.getOpcode() == ISD::BUILD_VECTOR)
+ Elts.append(Op->op_begin(), Op->op_end());
+ else
+ return SDValue();
+ }
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ for (SDValue Op : Elts)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+
+ if (SVT.bitsGT(VT.getScalarType()))
+ for (SDValue &Op : Elts)
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+
+ SDValue V = DAG.getBuildVector(VT, DL, Elts);
+ NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
+ return V;
+}
+
+/// Gets or creates the specified node.
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), None);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue Operand, const SDNodeFlags Flags) {
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ LLVM_FALLTHROUGH;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
+ break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::FP16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(APFloat::IEEEhalf(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool ignored;
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ return getConstant(IntVal, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ case ISD::FP_TO_FP16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(APFloat::IEEEhalf(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt(), DL, VT);
+ }
+ }
+ }
+
+ // Constant fold unary operations with a vector integer or float operand.
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
+ if (BV->isConstant()) {
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ SDValue Ops = { Operand };
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
+ }
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {Operand};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid fpext node, dst < src!");
+ if (Operand.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (Operand.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (Operand.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "SIGN_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ZERO_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ANY_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid anyext node, dst < src!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunc x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getOperand(0);
+ if (OpOp.getValueType() == VT) {
+ transferDbgValues(Operand, OpOp);
+ return OpOp;
+ }
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "TRUNCATE result type type should be vector iff the operand "
+ "type is vector!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsGT(VT) &&
+ "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ if (Operand.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ return Operand.getOperand(0);
+ }
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(Operand.getValueType().bitsLE(VT) &&
+ "The input must be the same size or smaller than the result.");
+ assert(VT.getVectorNumElements() <
+ Operand.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BSWAP:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid BSWAP!");
+ assert((VT.getScalarSizeInBits() % 16 == 0) &&
+ "BSWAP types must be a multiple of 16 bits!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITREVERSE:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid BITREVERSE!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITCAST:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+ "Cannot BITCAST between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // Negation of an unknown bag of bits is still completely undefined.
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+ OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
+ Operand.getOperand(0), Flags);
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {Operand};
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
+ switch (Opcode) {
+ case ISD::ADD: return std::make_pair(C1 + C2, true);
+ case ISD::SUB: return std::make_pair(C1 - C2, true);
+ case ISD::MUL: return std::make_pair(C1 * C2, true);
+ case ISD::AND: return std::make_pair(C1 & C2, true);
+ case ISD::OR: return std::make_pair(C1 | C2, true);
+ case ISD::XOR: return std::make_pair(C1 ^ C2, true);
+ case ISD::SHL: return std::make_pair(C1 << C2, true);
+ case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
+ case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
+ case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
+ case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
+ case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
+ case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
+ case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
+ case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.udiv(C2), true);
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.urem(C2), true);
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.sdiv(C2), true);
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.srem(C2), true);
+ }
+ return std::make_pair(APInt(1, 0), false);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, const ConstantSDNode *C1,
+ const ConstantSDNode *C2) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
+ C2->getAPIntValue());
+ if (!Folded.second)
+ return SDValue();
+ return getConstant(Folded.first, DL, VT);
+}
+
+SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
+ const GlobalAddressSDNode *GA,
+ const SDNode *N2) {
+ if (GA->getOpcode() != ISD::GlobalAddress)
+ return SDValue();
+ if (!TLI->isOffsetFoldingLegal(GA))
+ return SDValue();
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C2)
+ return SDValue();
+ int64_t Offset = C2->getSExtValue();
+ switch (Opcode) {
+ case ISD::ADD: break;
+ case ISD::SUB: Offset = -uint64_t(Offset); break;
+ default: return SDValue();
+ }
+ return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,
+ GA->getOffset() + uint64_t(Offset));
+}
+
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ llvm::any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() ||
+ isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNode *N1, SDNode *N2) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
+ return getUNDEF(VT);
+
+ // Handle the case of two scalars.
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
+ SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
+ }
+
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
+ return FoldSymbolOffset(Opcode, VT, GA, N2);
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
+ return FoldSymbolOffset(Opcode, VT, GA, N1);
+
+ // For vectors, extract each constant element and fold them individually.
+ // Either input may be an undef value.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (!BV1 && !N1->isUndef())
+ return SDValue();
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ if (!BV2 && !N2->isUndef())
+ return SDValue();
+ // If both operands are undef, that's handled the same way as scalars.
+ if (!BV1 && !BV2)
+ return SDValue();
+
+ assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) &&
+ "Vector binop with different number of elements in operands?");
+
+ EVT SVT = VT.getScalarType();
+ EVT LegalSVT = SVT;
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+ SmallVector<SDValue, 4> Outputs;
+ unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands();
+ for (unsigned I = 0; I != NumOps; ++I) {
+ SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT);
+ SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT);
+ if (SVT.isInteger()) {
+ if (V1->getValueType(0).bitsGT(SVT))
+ V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+ if (V2->getValueType(0).bitsGT(SVT))
+ V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ }
+
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ // Fold one vector element.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ Outputs.push_back(ScalarResult);
+ }
+
+ assert(VT.getVectorNumElements() == Outputs.size() &&
+ "Vector size mismatch!");
+
+ // We may have a vector type but a scalar result. Create a splat.
+ Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+
+ // Build a big vector out of the scalar elements we generated.
+ return getBuildVector(VT, SDLoc(), Outputs);
+}
+
+// TODO: Merge with FoldConstantArithmetic
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
+ const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags Flags) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
+ // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ if (!VT.isVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorNumElements() == NumElts;
+ };
+
+ auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
+ (BV && BV->isConstant());
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build vector of constant
+ // or UNDEF scalars.
+ if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ !llvm::all_of(Ops, IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean
+ // that is then sign-extended back to the legal result type.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(VT.getScalarType()))
+ return SDValue();
+ }
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned i = 0; i != NumElts; i++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+ if (!InBV) {
+ // We've checked that this is UNDEF or a constant of some kind.
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp = InBV->getOperand(i);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ SDValue V = getBuildVector(VT, DL, ScalarResults);
+ NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDValue N1, SDValue N2) {
+ // TODO: We don't do any constant folding for strict FP opcodes here, but we
+ // should. That will require dealing with a potentially non-default
+ // rounding mode, checking the "opStatus" return value from the APFloat
+ // math calculations, and possibly other variations.
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP && N2CFP) {
+ APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+ switch (Opcode) {
+ case ISD::FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FREM:
+ C1.mod(C2);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FCOPYSIGN:
+ C1.copySign(C2);
+ return getConstantFP(C1, DL, VT);
+ default: break;
+ }
+ }
+ if (N1CFP && Opcode == ISD::FP_ROUND) {
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ bool Unused;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return getConstantFP(C1, DL, VT);
+ }
+
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+ return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, const SDNodeFlags Flags) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ // Canonicalize constant to RHS if commutative.
+ if (TLI->isCommutativeBinOp(Opcode)) {
+ if (N1C && !N2C) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ } else if (N1CFP && !N2CFP) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+ return V;
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ if (SDValue V = simplifyShift(N1, N2))
+ return V;
+ LLVM_FALLTHROUGH;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
+ // Verify that the shift amount VT is big enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
+ "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+ if (VT.getScalarType() == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
+ unsigned FromBits = EVT.getScalarSizeInBits();
+ Val <<= Val.getBitWidth() - FromBits;
+ Val.ashrInPlace(Val.getBitWidth() - FromBits);
+ return getConstant(Val, DL, ConstantVT);
+ };
+
+ if (N1C) {
+ const APInt &Val = N1C->getAPIntValue();
+ return SignExtendInReg(Val, VT);
+ }
+ if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+ SmallVector<SDValue, 8> Ops;
+ llvm::EVT OpVT = N1.getOperand(0).getValueType();
+ for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ SDValue Op = N1.getOperand(i);
+ if (Op.isUndef()) {
+ Ops.push_back(getUNDEF(OpVT));
+ continue;
+ }
+ ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ APInt Val = C->getAPIntValue();
+ Ops.push_back(SignExtendInReg(Val, OpVT));
+ }
+ return getBuildVector(VT, DL, Ops);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+ "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+ element type of the vector.");
+
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
+ if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor, DL,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+
+ if (VT != Elt.getValueType())
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+
+ // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
+ // when vector types are scalarized and v1iX is legal.
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+ if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().getVectorNumElements() == 1) {
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
+ N1.getOperand(1));
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (N1C) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() ==
+ N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (N2C) {
+ assert((VT.getVectorNumElements() + N2C->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getSimpleValueType())
+ return N1;
+
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
+ // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+ // during shuffle legalization.
+ if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+ VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ }
+ break;
+ }
+
+ // Perform trivial constant folding.
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
+ return SV;
+
+ if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
+ return V;
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.isUndef()) {
+ if (TLI->isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ return getUNDEF(VT); // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.isUndef()) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.isUndef())
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, DL, VT);
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return getUNDEF(VT); // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
+ case ISD::OR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ return getAllOnesConstant(DL, VT);
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2};
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
+ // Perform various simplifications.
+ switch (Opcode) {
+ case ISD::FMA: {
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+ N3.getValueType() == VT && "FMA types must match!");
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+ ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
+ if (N1CFP && N2CFP && N3CFP) {
+ APFloat V1 = N1CFP->getValueAPF();
+ const APFloat &V2 = N2CFP->getValueAPF();
+ const APFloat &V3 = N3CFP->getValueAPF();
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Attempt to simplify BUILD_VECTOR.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::SETCC: {
+ assert(VT.isInteger() && "SETCC result type must be an integer!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ "SETCC operands must have the same type!");
+ assert(VT.isVector() == N1.getValueType().isVector() &&
+ "SETCC type should be vector iff the operand type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+ "SETCC vector element counts must match!");
+ // Use FoldSetCC to simplify SETCC's.
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
+ return V;
+ }
+ break;
+ }
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (SDValue V = simplifySelect(N1, N2, N3))
+ return V;
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::INSERT_VECTOR_ELT: {
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
+ // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
+ if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ return getUNDEF(VT);
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // Inserting undef into undef is still undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index)) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index)->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getSimpleValueType())
+ return N2;
+
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
+ }
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2, N3};
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ assert(!Value.isUndef());
+
+ unsigned NumBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
+ if (VT.isInteger()) {
+ bool IsOpaque = VT.getSizeInBits() > 64 ||
+ !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+ return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+ }
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
+ VT);
+ }
+
+ assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+ EVT IntVT = VT.getScalarType();
+ if (!IntVT.isInteger())
+ IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+ DAG.getConstant(Magic, dl, IntVT));
+ }
+
+ if (VT != Value.getValueType() && !VT.isInteger())
+ Value = DAG.getBitcast(VT.getScalarType(), Value);
+ if (VT != Value.getValueType())
+ Value = DAG.getSplatBuildVector(VT, dl, Value);
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const ConstantDataArraySlice &Slice) {
+ // Handle vector with all elements zero.
+ if (Slice.Array == nullptr) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, dl, VT);
+ else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ return DAG.getConstantFP(0.0, dl, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));
+
+ APInt Val(NumVTBits, 0);
+ if (DAG.getDataLayout().isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Slice[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
+ }
+
+ // If the "cost" of materializing the integer immediate is less than the cost
+ // of a load, then it is cost effective to turn the load into the immediate.
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
+ return DAG.getConstant(Val, dl, VT);
+ return SDValue(nullptr, 0);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ const SDLoc &DL) {
+ EVT VT = Base.getValueType();
+ return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
+}
+
+/// Returns true if memcpy source is constant data.
+static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
+ uint64_t SrcDelta = 0;
+ GlobalAddressSDNode *G = nullptr;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
+ SrcDelta + G->getOffset());
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Alignment,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memcpy of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Alignment > SrcAlign)
+ SrcAlign = Alignment;
+ ConstantDataArraySlice Slice;
+ bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
+ bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
+ (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
+
+ if (NewAlign > Alignment) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Alignment = NewAlign;
+ }
+ }
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
+ if (CopyFromConstant &&
+ (isZeroConstant || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ ConstantDataArraySlice SubSlice;
+ if (SrcOff < Slice.Length) {
+ SubSlice = Slice;
+ SubSlice.move(SrcOff);
+ } else {
+ // This is an out-of-bounds access and hence UB. Pretend we read zero.
+ SubSlice.Array = nullptr;
+ SubSlice.Offset = 0;
+ SubSlice.Length = VTSize;
+ }
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
+ if (Value.getNode()) {
+ Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
+ OutChains.push_back(Store);
+ }
+ }
+
+ if (!Store.getNode()) {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
+ assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
+ OutLoadChains.push_back(Value.getValue(1));
+
+ Store = DAG.getTruncStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
+ OutStoreChains.push_back(Store);
+ }
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ Size -= VTSize;
+ }
+
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memmove of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+ // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
+ // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
+ // correct code.
+ bool AllowOverlap = false;
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+ /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value;
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
+ Value =
+ DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+/// Lower the call to 'memset' intrinsic function into a series of store
+/// operations.
+///
+/// \param DAG Selection DAG where lowered code is placed.
+/// \param dl Link to corresponding IR location.
+/// \param Chain Control flow dependency.
+/// \param Dst Pointer to destination memory location.
+/// \param Src Value of byte to write into the memory.
+/// \param Size Number of bytes to write.
+/// \param Align Alignment of the destination in bytes.
+/// \param isVol True if destination is volatile.
+/// \param DstPtrInfo IR information on the memory pointer.
+/// \returns New head in the control flow, if lowering was successful, empty
+/// SDValue otherwise.
+///
+/// The function tries to replace 'llvm.memset' intrinsic with several store
+/// operations and value calculation code. This is usually profitable for small
+/// memory size.
+static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ // Turn a memset of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZeroVal =
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+ (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+ /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes()))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ true, DstPtrInfo, SrcPtrInfo);
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, isVol, DstPtrInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Args.push_back(Entry);
+ Entry.Node = Src;
+ Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Args.push_back(Entry);
+
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Value, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*getContext());
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
+ EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp,
+ MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_CLR ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FADD ||
+ Opcode == ISD::ATOMIC_LOAD_FSUB ||
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Val};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(Ops.size());
+ for (unsigned i = 0; i < Ops.size(); ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ if (!Size)
+ Size = MemVT.getStoreSize();
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
+ SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ Opcode == ISD::LIFETIME_START ||
+ Opcode == ISD::LIFETIME_END ||
+ ((int)Opcode <= std::numeric_limits<int>::max() &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
+ Opcode, dl.getIROrder(), VTList, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+ }
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
+ SDValue Chain, int FrameIndex,
+ int64_t Size, int64_t Offset) {
+ const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[2] = {
+ Chain,
+ getFrameIndex(FrameIndex,
+ getTargetLoweringInfo().getFrameIndexTy(getDataLayout()),
+ true)};
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(FrameIndex);
+ ID.AddInteger(Size);
+ ID.AddInteger(Offset);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ LifetimeSDNode *N = newSDNode<LifetimeSDNode>(
+ Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return Info;
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.isUndef())
+ return InferPointerInfo(Info, DAG, Ptr);
+ return Info;
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ VT, MMO);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
+ MemVT, Alignment, MMOFlags, AAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ MemVT, MMO);
+}
+
+SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
+ LD->getAAInfo());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue PassThru,
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::LoadExtType ExtTy, bool isExpanding) {
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
+ dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ExtTy, isExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsTruncating, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Val, Ptr, Mask };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
+ dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ IsTruncating, IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getPassThru().getValueType() == N->getValueType(0) &&
+ "Incompatible type of the PassThru value in MaskedGatherSDNode");
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() >=
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
+ // select undef, T, F --> T (if T is a constant), otherwise F
+ // select, ?, undef, F --> F
+ // select, ?, T, undef --> T
+ if (Cond.isUndef())
+ return isConstantValueOfAnyType(T) ? T : F;
+ if (T.isUndef())
+ return F;
+ if (F.isUndef())
+ return T;
+
+ // select true, T, F --> T
+ // select false, T, F --> F
+ if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
+ return CondC->isNullValue() ? F : T;
+
+ // TODO: This should simplify VSELECT with constant condition using something
+ // like this (but check boolean contents to be complete?):
+ // if (ISD::isBuildVectorAllOnes(Cond.getNode()))
+ // return T;
+ // if (ISD::isBuildVectorAllZeros(Cond.getNode()))
+ // return F;
+
+ // select ?, T, T --> T
+ if (T == F)
+ return T;
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
+ // shift undef, Y --> 0 (can always assume that the undef value is 0)
+ if (X.isUndef())
+ return getConstant(0, SDLoc(X.getNode()), X.getValueType());
+ // shift X, undef --> undef (because it may shift by the bitwidth)
+ if (Y.isUndef())
+ return getUNDEF(X.getValueType());
+
+ // shift 0, Y --> 0
+ // shift X, 0 --> X
+ if (isNullOrNullSplat(X) || isNullOrNullSplat(Y))
+ return X;
+
+ // shift X, C >= bitwidth(X) --> undef
+ // All vector elements must be too big (or undef) to avoid partial undefs.
+ auto isShiftTooBig = [X](ConstantSDNode *Val) {
+ return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits());
+ };
+ if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
+ return getUNDEF(X.getValueType());
+
+ return SDValue();
+}
+
+// TODO: Use fast-math-flags to enable more simplifications.
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+ ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ if (!YC)
+ return SDValue();
+
+ // X + -0.0 --> X
+ if (Opcode == ISD::FADD)
+ if (YC->getValueAPF().isNegZero())
+ return X;
+
+ // X - +0.0 --> X
+ if (Opcode == ISD::FSUB)
+ if (YC->getValueAPF().isPosZero())
+ return X;
+
+ // X * 1.0 --> X
+ // X / 1.0 --> X
+ if (Opcode == ISD::FMUL || Opcode == ISD::FDIV)
+ if (YC->getValueAPF().isExactlyValue(1.0))
+ return X;
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue SV, unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDUse> Ops) {
+ switch (Ops.size()) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
+ return getNode(Opcode, DL, VT, NewOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
+ unsigned NumOps = Ops.size();
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::BUILD_VECTOR:
+ // Attempt to simplify BUILD_VECTOR.
+ if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+ return V;
+ break;
+ case ISD::CONCAT_VECTORS:
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ case ISD::SELECT_CC:
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ case ISD::BR_CC:
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
+ return getNode(Opcode, DL, getVTList(ResultTys), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ }
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, None);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(2U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(3U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(4U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+ ID.AddInteger(VT4.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
+ unsigned NumVTs = VTs.size();
+ FoldingSetNodeID ID;
+ ID.AddInteger(NumVTs);
+ for (unsigned index = 0; index < NumVTs; index++) {
+ ID.AddInteger(VTs[index].getRawBits());
+ }
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ llvm::copy(VTs, Array);
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // If no operands changed just return the input node.
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ updateDivergence(N);
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
+ ArrayRef<MachineMemOperand *> NewMemRefs) {
+ if (NewMemRefs.empty()) {
+ N->clearMemRefs();
+ return;
+ }
+
+ // Check if we can avoid allocating by storing a single reference directly.
+ if (NewMemRefs.size() == 1) {
+ N->MemRefs = NewMemRefs[0];
+ N->NumMemRefs = 1;
+ return;
+ }
+
+ MachineMemOperand **MemRefsBuffer =
+ Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size());
+ llvm::copy(NewMemRefs, MemRefsBuffer);
+ N->MemRefs = MemRefsBuffer;
+ N->NumMemRefs = static_cast<int>(NewMemRefs.size());
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs,ArrayRef<SDValue> Ops) {
+ SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
+ // Reset the NodeID to -1.
+ New->setNodeId(-1);
+ if (New != N) {
+ ReplaceAllUsesWith(N, New);
+ RemoveDeadNode(N);
+ }
+ return New;
+}
+
+/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+/// For IROrder, we keep the smaller of the two
+SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ N->setDebugLoc(DebugLoc());
+ }
+ unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
+ N->setIROrder(Order);
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the SDLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
+/// As a consequence it isn't appropriate to use from within the DAG combiner or
+/// the legalizer which maintain worklists that would need to be updated when
+/// deleting things.
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, ArrayRef<SDValue> Ops) {
+ // If an identical node already exists, use it.
+ void *IP = nullptr;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops);
+ if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
+ return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = nullptr;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ // For MachineNode, initialize the memory references information.
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
+ MN->clearMemRefs();
+
+ // Swap for an appropriately sized array from the recycler.
+ removeOperands(N);
+ createOperands(N, Ops);
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SDNode *N : DeadNodeSet)
+ if (N->use_empty())
+ DeadNodes.push_back(N);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
+ unsigned OrigOpc = Node->getOpcode();
+ unsigned NewOpc;
+ switch (OrigOpc) {
+ default:
+ llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; break;
+ case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
+ case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
+ case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
+ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
+ case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
+ case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
+ case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
+ case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
+ case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
+ case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
+ }
+
+ assert(Node->getNumValues() == 2 && "Unexpected number of results!");
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SmallVector<SDValue, 3> Ops;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ SDVTList VTs = getVTList(Node->getValueType(0));
+ SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ ReplaceAllUsesWith(Node, Res);
+ RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, None);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ ArrayRef<EVT> ResultTys,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(ResultTys);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs,
+ ArrayRef<SDValue> Ops) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = nullptr;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops);
+ IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
+ }
+ }
+
+ // Allocate a new MachineSDNode.
+ N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags Flags) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
+ E->intersectFlagsWith(Flags);
+ return E;
+ }
+ }
+ return nullptr;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+/// SDNode
+SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
+ SDNode *N, unsigned R, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O);
+}
+
+/// Constant
+SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ const Value *C,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O);
+}
+
+/// FrameIndex
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
+ bool IsIndirect,
+ const DebugLoc &DL,
+ unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ unsigned VReg, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
+}
+
+void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
+ unsigned OffsetInBits, unsigned SizeInBits,
+ bool InvalidateDbg) {
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ assert(FromNode && ToNode && "Can't modify dbg values");
+
+ // PR35338
+ // TODO: assert(From != To && "Redundant dbg value transfer");
+ // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer");
+ if (From == To || FromNode == ToNode)
+ return;
+
+ if (!FromNode->getHasDebugValue())
+ return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
+ if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ continue;
+
+ // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
+
+ // Just transfer the dbg value attached to From.
+ if (Dbg->getResNo() != From.getResNo())
+ continue;
+
+ DIVariable *Var = Dbg->getVariable();
+ auto *Expr = Dbg->getExpression();
+ // If a fragment is requested, update the expression.
+ if (SizeInBits) {
+ // When splitting a larger (e.g., sign-extended) value whose
+ // lower bits are described with an SDDbgValue, do not attempt
+ // to transfer the SDDbgValue to the upper bits.
+ if (auto FI = Expr->getFragmentInfo())
+ if (OffsetInBits + SizeInBits > FI->SizeInBits)
+ continue;
+ auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits,
+ SizeInBits);
+ if (!Fragment)
+ continue;
+ Expr = *Fragment;
+ }
+ // Clone the SDDbgValue and move it to To.
+ SDDbgValue *Clone =
+ getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+
+ if (InvalidateDbg) {
+ // Invalidate value and indicate the SDDbgValue should not be emitted.
+ Dbg->setIsInvalidated();
+ Dbg->setIsEmitted();
+ }
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, ToNode, false);
+}
+
+void SelectionDAG::salvageDebugInfo(SDNode &N) {
+ if (!N.getHasDebugValue())
+ return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (auto DV : GetDbgValues(&N)) {
+ if (DV->isInvalidated())
+ continue;
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if (!isConstantIntBuildVectorOrConstantInt(N0) &&
+ isConstantIntBuildVectorOrConstantInt(N1)) {
+ uint64_t Offset = N.getConstantOperandVal(1);
+ // Rewrite an ADD constant node into a DIExpression. Since we are
+ // performing arithmetic to compute the variable's *value* in the
+ // DIExpression, we need to mark the expression with a
+ // DW_OP_stack_value.
+ auto *DIExpr = DV->getExpression();
+ DIExpr =
+ DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);
+ SDDbgValue *Clone =
+ getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ ClonedDVs.push_back(Clone);
+ DV->setIsInvalidated();
+ DV->setIsEmitted();
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+ N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
+ }
+ }
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, Dbg->getSDNode(), false);
+}
+
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG &d,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
+};
+
+} // end anonymous namespace
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Preserve Debug Values
+ transferDbgValues(FromN, To);
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Preserve Debug Info. Only do this if there's a use.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ if (From->hasAnyUseOfValue(i)) {
+ assert((i < To->getNumValues()) && "Invalid To location");
+ transferDbgValues(SDValue(From, i), SDValue(To, i));
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+
+ // Preserve Debug Info.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ transferDbgValues(SDValue(From, i), To[i]);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this happens the
+ // uses are usually next to each other in the list. To help reduce the
+ // number of CSE and divergence recomputations, process all the uses of this
+ // user that we can find this way.
+ bool To_IsDivergent = false;
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ To_IsDivergent |= ToOp->isDivergent();
+ } while (UI != UE && *UI == User);
+
+ if (To_IsDivergent != From->isDivergent())
+ updateDivergence(User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To);
+ return;
+ }
+
+ // Preserve Debug Info.
+ transferDbgValues(From, To);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
+ } while (UI != UE && *UI == User);
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
+}
+
+namespace {
+
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+
+} // end anonymous namespace
+
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+ if (TLI->isSDNodeAlwaysUniform(N))
+ return;
+ bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsDivergent |= Op.getNode()->isDivergent();
+ }
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ for (auto U : N->uses()) {
+ updateDivergence(U);
+ }
+ }
+}
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto &N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (size_t I = 0; I != Order.size(); ++I) {
+ SDNode *N = Order[I];
+ for (auto U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+#ifndef NDEBUG
+void SelectionDAG::VerifyDAGDiverence() {
+ std::vector<SDNode *> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ DenseMap<const SDNode *, bool> DivergenceMap;
+ for (auto &N : allnodes()) {
+ DivergenceMap[&N] = false;
+ }
+ for (auto N : TopoOrder) {
+ bool IsDivergent = DivergenceMap[N];
+ bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+ }
+ if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+ DivergenceMap[N] = true;
+ }
+ }
+ for (auto &N : allnodes()) {
+ (void)N;
+ assert(DivergenceMap[&N] == N.isDivergent() &&
+ "Divergence bit inconsistency detected\n");
+ }
+}
+#endif
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To);
+
+ transferDbgValues(*From, *To);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ llvm::sort(Uses);
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = &*I++;
+ checkForCycles(N, this);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q(N);
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
+ checkForCycles(N, this);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P->getIterator() != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (Node.getIterator() == SortedPos) {
+#ifndef NDEBUG
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull(this); dbgs() << "\n";
+ dbgs() << "Checking if this is due to cycles\n";
+ checkForCycles(this, true);
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+ if (SD) {
+ assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
+ SD->setHasDebugValue(true);
+ }
+ DbgInfo->add(DB, SD, isParameter);
+}
+
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
+ DbgInfo->add(DB);
+}
+
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ // The new memory operation must have the same position as the old load in
+ // terms of memory dependency. Create a TokenFactor for the old load and new
+ // memory operation and update uses of the old load's output chain to use that
+ // TokenFactor.
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
+ if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
+ return NewChain;
+
+ SDValue TokenFactor =
+ getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
+ ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ return TokenFactor;
+}
+
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **OutFunction) {
+ assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+ auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ auto *Module = MF->getFunction().getParent();
+ auto *Function = Module->getFunction(Symbol);
+
+ if (OutFunction != nullptr)
+ *OutFunction = Function;
+
+ if (Function != nullptr) {
+ auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+ return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+ }
+
+ std::string ErrorStr;
+ raw_string_ostream ErrorFormatter(ErrorStr);
+
+ ErrorFormatter << "Undefined external symbol ";
+ ErrorFormatter << '"' << Symbol << '"';
+ ErrorFormatter.flush();
+
+ report_fatal_error(ErrorStr);
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
+SDValue llvm::peekThroughBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ return V;
+}
+
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+ while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ V = V.getOperand(0);
+ return V;
+}
+
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
+ if (V.getOpcode() != ISD::XOR)
+ return false;
+ V = peekThroughBitcasts(V.getOperand(1));
+ unsigned NumBits = V.getScalarValueSizeInBits();
+ ConstantSDNode *C =
+ isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+ return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+ bool AllowTruncation) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
+ }
+
+ return nullptr;
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
+ }
+
+ return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+ if (CN && (UndefElements.none() || AllowUndefs))
+ return CN;
+ }
+
+ return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
+ const APInt &DemandedElts,
+ bool AllowUndefs) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN =
+ BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ if (CN && (UndefElements.none() || AllowUndefs))
+ return CN;
+ }
+
+ return nullptr;
+}
+
+bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
+ // TODO: may want to use peekThroughBitcast() here.
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
+ return C && C->isNullValue();
+}
+
+bool llvm::isOneOrOneSplat(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+ N = peekThroughBitcasts(N);
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ ConstantSDNode *C = isConstOrConstSplat(N);
+ return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
+ const DebugLoc &DL,
+ const GlobalValue *GA, EVT VT,
+ int64_t o, unsigned TF)
+ : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
+ EVT VT, unsigned SrcAS,
+ unsigned DestAS)
+ : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+
+MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
+ : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ MemSDNodeBits.IsVolatile = MMO->isVolatile();
+ MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
+ MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
+ MemSDNodeBits.IsInvariant = MMO->isInvariant();
+
+ // We check here that the size of the memory operand fits within the size of
+ // the MMO. This is because the MMO might indicate only a possible address
+ // range instead of specifying the affected memory addresses precisely.
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+
+} // end anonymous namespace
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true>> VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+bool SDNode::isOnlyUserOf(const SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// Return true if the only users of N are contained in Nodes.
+bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (llvm::any_of(Nodes,
+ [&User](const SDNode *Node) { return User == Node; }))
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+bool SDValue::isOperandOf(const SDNode *N) const {
+ return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
+}
+
+bool SDNode::isOperandOf(const SDNode *N) const {
+ return any_of(N->op_values(),
+ [this](SDValue Op) { return this == Op.getNode(); });
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+///
+/// Note that we only need to examine chains when we're searching for
+/// side-effects; SelectionDAG requires that all side-effects are represented
+/// by chains, even if another operand would force a specific ordering. This
+/// constraint is necessary to allow transformations like splitting loads.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel.
+ if (getOpcode() == ISD::TokenFactor) {
+ // First, try a shallow search.
+ if (is_contained((*this)->ops(), Dest)) {
+ // We found the chain we want as an operand of this TokenFactor.
+ // Essentially, we reach the chain without side-effects if we could
+ // serialize the TokenFactor into a simple chain of operations with
+ // Dest as the last operation. This is automatically true if the
+ // chain has one use: there are no other ordering constraints.
+ // If the chain has more than one use, we give up: some other
+ // use of Dest might force a side-effect between Dest and the current
+ // node.
+ if (Dest.hasOneUse())
+ return true;
+ }
+ // Next, try a deep search: check whether every operand of the TokenFactor
+ // reaches Dest.
+ return llvm::all_of((*this)->ops(), [=](SDValue Op) {
+ return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
+ });
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (Ld->isUnordered())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(this);
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
+ this->Flags.intersectWith(Flags);
+}
+
+SDValue
+SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials) {
+ // The pattern must end in an extract from index 0.
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isNullConstant(Extract->getOperand(1)))
+ return SDValue();
+
+ // Match against one of the candidate binary ops.
+ SDValue Op = Extract->getOperand(0);
+ if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
+ return Op.getOpcode() == unsigned(BinOp);
+ }))
+ return SDValue();
+
+ // Floating-point reductions may require relaxed constraints on the final step
+ // of the reduction because they may reorder intermediate operations.
+ unsigned CandidateBinOp = Op.getOpcode();
+ if (Op.getValueType().isFloatingPoint()) {
+ SDNodeFlags Flags = Op->getFlags();
+ switch (CandidateBinOp) {
+ case ISD::FADD:
+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unhandled FP opcode for binop reduction");
+ }
+ }
+
+ // Matching failed - attempt to see if we did enough stages that a partial
+ // reduction from a subvector is possible.
+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {
+ if (!AllowPartials || !Op)
+ return SDValue();
+ EVT OpVT = Op.getValueType();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);
+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
+ return SDValue();
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ };
+
+ // At each stage, we're looking for something that looks like:
+ // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+ // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+ // i32 undef, i32 undef, i32 undef, i32 undef>
+ // %a = binop <8 x i32> %op, %s
+ // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+ // we expect something like:
+ // <4,5,6,7,u,u,u,u>
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ // While a partial reduction match would be:
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+ SDValue PrevOp;
+ for (unsigned i = 0; i < Stages; ++i) {
+ unsigned MaskEnd = (1 << i);
+
+ if (Op.getOpcode() != CandidateBinOp)
+ return PartialReduction(PrevOp, MaskEnd);
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0);
+ if (Shuffle) {
+ Op = Op1;
+ } else {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1);
+ Op = Op0;
+ }
+
+ // The first operand of the shuffle should be the same as the other operand
+ // of the binop.
+ if (!Shuffle || Shuffle->getOperand(0) != Op)
+ return PartialReduction(PrevOp, MaskEnd);
+
+ // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+ for (int Index = 0; Index < (int)MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))
+ return PartialReduction(PrevOp, MaskEnd);
+
+ PrevOp = Op;
+ }
+
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return Op;
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] =
+ getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
+ break;
+ }
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+ return getBuildVector(VecVT, dl, Scalars);
+}
+
+std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
+ SDNode *N, unsigned ResNE) {
+ unsigned Opcode = N->getOpcode();
+ assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO ||
+ Opcode == ISD::USUBO || Opcode == ISD::SSUBO ||
+ Opcode == ISD::UMULO || Opcode == ISD::SMULO) &&
+ "Expected an overflow opcode");
+
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT ResEltVT = ResVT.getVectorElementType();
+ EVT OvEltVT = OvVT.getVectorElementType();
+ SDLoc dl(N);
+
+ // If ResNE is 0, fully unroll the vector op.
+ unsigned NE = ResVT.getVectorNumElements();
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ SmallVector<SDValue, 8> LHSScalars;
+ SmallVector<SDValue, 8> RHSScalars;
+ ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE);
+ ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE);
+
+ EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT);
+ SDVTList VTs = getVTList(ResEltVT, SVT);
+ SmallVector<SDValue, 8> ResScalars;
+ SmallVector<SDValue, 8> OvScalars;
+ for (unsigned i = 0; i < NE; ++i) {
+ SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]);
+ SDValue Ov =
+ getSelect(dl, OvEltVT, Res.getValue(1),
+ getBoolConstant(true, dl, OvEltVT, ResVT),
+ getConstant(0, dl, OvEltVT));
+
+ ResScalars.push_back(Res);
+ OvScalars.push_back(Ov);
+ }
+
+ ResScalars.append(ResNE - NE, getUNDEF(ResEltVT));
+ OvScalars.append(ResNE - NE, getUNDEF(OvEltVT));
+
+ EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE);
+ EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE);
+ return std::make_pair(getBuildVector(NewResVT, dl, ResScalars),
+ getBuildVector(NewOvVT, dl, OvScalars));
+}
+
+bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
+ LoadSDNode *Base,
+ unsigned Bytes,
+ int Dist) const {
+ if (LD->isVolatile() || Base->isVolatile())
+ return false;
+ // TODO: probably too restrictive for atomics, revisit
+ if (!LD->isSimple())
+ return false;
+ if (LD->isIndexed() || Base->isIndexed())
+ return false;
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+ auto LocDecomp = BaseIndexOffset::match(LD, *this);
+
+ int64_t Offset = 0;
+ if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
+ return (Dist * Bytes == Offset);
+ return false;
+}
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+ KnownBits Known(IdxWidth);
+ llvm::computeKnownBits(GV, Known, getDataLayout());
+ unsigned AlignBits = Known.countMinTrailingZeros();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = INT_MIN;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != INT_MIN) {
+ const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split (or expanded) into two not necessarily identical pieces.
+std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
+ // Currently all types are split in half.
+ EVT LoVT, HiVT;
+ if (!VT.isVector())
+ LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
+ else
+ LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());
+
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
+/// low/high part.
+std::pair<SDValue, SDValue>
+SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
+ N.getValueType().getVectorNumElements() &&
+ "More vector elements requested than available!");
+ SDValue Lo, Hi;
+ Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
+ getConstant(LoVT.getVectorNumElements(), DL,
+ TLI->getVectorIdxTy(getDataLayout())));
+ return std::make_pair(Lo, Hi);
+}
+
+/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
+ EVT VT = N.getValueType();
+ EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NextPowerOf2(VT.getVectorNumElements()));
+ return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+}
+
+void SelectionDAG::ExtractVectorElements(SDValue Op,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) {
+ EVT VT = Op.getValueType();
+ if (Count == 0)
+ Count = VT.getVectorNumElements();
+
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
+ SDLoc SL(Op);
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Op, getConstant(i, SL, IdxTy)));
+ }
+}
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool IsBigEndian) const {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned VecWidth = VT.getSizeInBits();
+ if (MinSplatBits > VecWidth)
+ return false;
+
+ // FIXME: The widths are based on this node's type, but build vectors can
+ // truncate their operands.
+ SplatValue = APInt(VecWidth, 0);
+ SplatUndef = APInt(VecWidth, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int NumOps = getNumOperands();
+ assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltWidth = VT.getScalarSizeInBits();
+
+ for (unsigned j = 0; j < NumOps; ++j) {
+ unsigned i = IsBigEndian ? NumOps - 1 - j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltWidth;
+
+ if (OpVal.isUndef())
+ SplatUndef.setBits(BitPos, BitPos + EltWidth);
+ else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+ HasAnyUndefs = (SplatUndef != 0);
+
+ // FIXME: This does not work for vectors with elements less than 8 bits.
+ while (VecWidth > 8) {
+ unsigned HalfSize = VecWidth / 2;
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ VecWidth = HalfSize;
+ }
+
+ SplatBitSize = VecWidth;
+ return true;
+}
+
+SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(getNumOperands());
+ }
+ assert(getNumOperands() == DemandedElts.getBitWidth() &&
+ "Unexpected vector size");
+ if (!DemandedElts)
+ return SDValue();
+ SDValue Splatted;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ SDValue Op = getOperand(i);
+ if (Op.isUndef()) {
+ if (UndefElements)
+ (*UndefElements)[i] = true;
+ } else if (!Splatted) {
+ Splatted = Op;
+ } else if (Splatted != Op) {
+ return SDValue();
+ }
+ }
+
+ if (!Splatted) {
+ unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+ assert(getOperand(FirstDemandedIdx).isUndef() &&
+ "Can only have a splat without a constant for all undefs.");
+ return getOperand(FirstDemandedIdx);
+ }
+
+ return Splatted;
+}
+
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ return getSplatValue(DemandedElts, UndefElements);
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
+}
+
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ const APFloat &APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
+bool BuildVectorSDNode::isConstant() const {
+ for (const SDValue &Op : op_values()) {
+ unsigned Opc = Op.getOpcode();
+ if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
+ return false;
+ }
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ // If all elements are undefined, this shuffle can be considered a splat
+ // (although it should eventually get simplified away completely).
+ if (i == e)
+ return true;
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+// Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ // Treat a GlobalAddress supporting constant offset folding as a
+ // constant integer.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI->isOffsetFoldingLegal(GA))
+ return GA;
+ return nullptr;
+}
+
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+
+ return nullptr;
+}
+
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ assert(SDNode::getMaxNumOperands() >= Vals.size() &&
+ "too many operands to fit into SDNode");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ if (!TLI->isSDNodeAlwaysUniform(Node))
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ checkForCycles(Node);
+}
+
+SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
+ SmallVectorImpl<SDValue> &Vals) {
+ size_t Limit = SDNode::getMaxNumOperands();
+ while (Vals.size() > Limit) {
+ unsigned SliceIdx = Vals.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit);
+ SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs);
+ Vals.erase(Vals.begin() + SliceIdx, Vals.end());
+ Vals.emplace_back(NewTF);
+ }
+ return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
+}
+
+#ifndef NDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSetImpl<const SDNode*> &Visited,
+ SmallPtrSetImpl<const SDNode*> &Checked,
+ const llvm::SelectionDAG *DAG) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N).second) {
+ errs() << "Detected cycle in SelectionDAG\n";
+ dbgs() << "Offending node:\n";
+ N->dumprFull(DAG); dbgs() << "\n";
+ abort();
+ }
+
+ for (const SDValue &Op : N->op_values())
+ checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N,
+ const llvm::SelectionDAG *DAG,
+ bool force) {
+#ifndef NDEBUG
+ bool check = force;
+#ifdef EXPENSIVE_CHECKS
+ check = true;
+#endif // EXPENSIVE_CHECKS
+ if (check) {
+ assert(N && "Checking nonexistent SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked, DAG);
+ }
+#endif // !NDEBUG
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+ checkForCycles(DAG->getRoot().getNode(), DAG, force);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
new file mode 100644
index 0000000000000..3a53ab9717a45
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -0,0 +1,298 @@
+//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+
+using namespace llvm;
+
+bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
+ const SelectionDAG &DAG,
+ int64_t &Off) const {
+ // Conservatively fail if we a match failed..
+ if (!Base.getNode() || !Other.Base.getNode())
+ return false;
+ if (!hasValidOffset() || !Other.hasValidOffset())
+ return false;
+ // Initial Offset difference.
+ Off = *Other.Offset - *Offset;
+
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if (A->getGlobal() == B->getGlobal()) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+
+ // Match Constants
+ if (auto *A = dyn_cast<ConstantPoolSDNode>(Base))
+ if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) {
+ bool IsMatch =
+ A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry();
+ if (IsMatch) {
+ if (A->isMachineConstantPoolEntry())
+ IsMatch = A->getMachineCPVal() == B->getMachineCPVal();
+ else
+ IsMatch = A->getConstVal() == B->getConstVal();
+ }
+ if (IsMatch) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+ }
+
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ // Match FrameIndexes.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) {
+ // Equal FrameIndexes - offsets are directly comparable.
+ if (A->getIndex() == B->getIndex())
+ return true;
+ // Non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
+ if (MFI.isFixedObjectIndex(A->getIndex()) &&
+ MFI.isFixedObjectIndex(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
+ const Optional<int64_t> NumBytes0,
+ const SDNode *Op1,
+ const Optional<int64_t> NumBytes1,
+ const SelectionDAG &DAG, bool &IsAlias) {
+
+ BaseIndexOffset BasePtr0 = match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = match(Op1, DAG);
+
+ if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
+ return false;
+ int64_t PtrDiff;
+ if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+ BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ IsAlias = !(
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ (*NumBytes0 <= PtrDiff) ||
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
+ return true;
+ }
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ IsAlias = false;
+ return true;
+ }
+ return false; // Cannot determine whether the pointers alias.
+}
+
+bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other,
+ int64_t OtherBitSize, int64_t &BitOffset) const {
+ int64_t Offset;
+ if (!equalBaseIndex(Other, DAG, Offset))
+ return false;
+ if (Offset >= 0) {
+ // Other is after *this:
+ // [-------*this---------]
+ // [---Other--]
+ // ==Offset==>
+ BitOffset = 8 * Offset;
+ return BitOffset + OtherBitSize <= BitSize;
+ }
+ // Other starts strictly before *this, it cannot be fully contained.
+ // [-------*this---------]
+ // [--Other--]
+ return false;
+}
+
+/// Parses tree in Ptr for base, index, offset addresses.
+static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
+ SDValue Ptr = N->getBasePtr();
+
+ // (((B + I*M) + c)) + c ...
+ SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
+ SDValue Index = SDValue();
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+
+ // pre-inc/pre-dec ops are components of EA.
+ if (N->getAddressingMode() == ISD::PRE_INC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset += C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ } else if (N->getAddressingMode() == ISD::PRE_DEC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset -= C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ }
+
+ // Consume constant adds & ors with appropriate masking.
+ while (true) {
+ switch (Base->getOpcode()) {
+ case ISD::OR:
+ // Only consider ORs which act as adds.
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
+ if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
+ Offset += C->getSExtValue();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
+ continue;
+ }
+ break;
+ case ISD::ADD:
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ Offset += C->getSExtValue();
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
+ continue;
+ }
+ break;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ auto *LSBase = cast<LSBaseSDNode>(Base.getNode());
+ unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0;
+ if (LSBase->isIndexed() && Base.getResNo() == IndexResNo)
+ if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) {
+ auto Off = C->getSExtValue();
+ if (LSBase->getAddressingMode() == ISD::PRE_DEC ||
+ LSBase->getAddressingMode() == ISD::POST_DEC)
+ Offset -= Off;
+ else
+ Offset += Off;
+ Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr());
+ continue;
+ }
+ break;
+ }
+ }
+ // If we get here break out of the loop.
+ break;
+ }
+
+ if (Base->getOpcode() == ISD::ADD) {
+ // TODO: The following code appears to be needless as it just
+ // bails on some Ptrs early, reducing the cases where we
+ // find equivalence. We should be able to remove this.
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Base is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Base->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ Index = Base->getOperand(1);
+ SDValue PotentialBase = Base->getOperand(0);
+
+ // Skip signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Check if Index Offset pattern
+ if (Index->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Index->getOperand(1)))
+ return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
+
+ Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue();
+ Index = Index->getOperand(0);
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else
+ IsIndexSignExt = false;
+ Base = PotentialBase;
+ }
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+}
+
+BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
+ const SelectionDAG &DAG) {
+ if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
+ return matchLSNode(LS0, DAG);
+ if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
+ if (LN->hasOffset())
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),
+ false);
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), false);
+ }
+ return BaseIndexOffset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+LLVM_DUMP_METHOD void BaseIndexOffset::dump() const {
+ print(dbgs());
+}
+
+void BaseIndexOffset::print(raw_ostream& OS) const {
+ OS << "BaseIndexOffset base=[";
+ Base->print(OS);
+ OS << "] index=[";
+ if (Index)
+ Index->print(OS);
+ OS << "] offset=" << Offset;
+}
+
+#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 0000000000000..8c15563fcd23d
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,10559 @@
+//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace PatternMatch;
+using namespace SwitchCG;
+
+#define DEBUG_TYPE "isel"
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+ LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<unsigned> SwitchPeelThreshold(
+ "switch-peel-threshold", cl::Hidden, cl::init(66),
+ cl::desc("Set the case probability threshold for peeling the case from a "
+ "switch statement. A value greater than 100 will void this "
+ "optimization"));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It is easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+// Return the calling convention if the Value passed requires ABI mangling as it
+// is a parameter to a function or a return value from a function which is not
+// an intrinsic.
+static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
+ if (auto *R = dyn_cast<ReturnInst>(V))
+ return R->getParent()->getParent()->getCallingConv();
+
+ if (auto *CI = dyn_cast<CallInst>(V)) {
+ const bool IsInlineAsm = CI->isInlineAsm();
+ const bool IsIndirectFunctionCall =
+ !IsInlineAsm && !CI->getCalledFunction();
+
+ // It is possible that the call instruction is an inline asm statement or an
+ // indirect function call in which case the return value of
+ // getCalledFunction() would be nullptr.
+ const bool IsInstrinsicCall =
+ !IsInlineAsm && !IsIndirectFunctionCall &&
+ CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
+
+ if (!IsInlineAsm && !IsInstrinsicCall)
+ return CI->getCallingConv();
+ }
+
+ return None;
+}
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ Optional<CallingConv::ID> CC);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger than ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ Optional<CallingConv::ID> CC = None,
+ Optional<ISD::NodeType> AssertOp = None) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
+ CC);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts =
+ (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT, V);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
+ OddVT, V, CC);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi =
+ DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueSizeInBits(), DL,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ // PartEVT is the type of the register class that holds the value.
+ // ValueVT is the type of the inline asm operation.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
+ // Handle types that have the same size.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle types with different sizes.
+ if (PartEVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartEVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp.hasValue())
+ Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ // Handle MMX to a narrower integer type by bitcasting MMX to integer and
+ // then truncating.
+ if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
+ ValueVT.bitsLT(PartEVT)) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ report_fatal_error("Unknown mismatch in getCopyFromParts!");
+}
+
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+ const Twine &ErrMsg) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(V);
+ if (!V)
+ return Ctx.emitError(ErrMsg);
+
+ const char *AsmError = ", possible invalid constraint for vector type";
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return Ctx.emitError(I, ErrMsg + AsmError);
+
+ return Ctx.emitError(I, ErrMsg);
+}
+
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger than ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ Optional<CallingConv::ID> CallConv) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const bool IsABIRegCopy = CallConv.hasValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs;
+
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT.getSizeInBits() ==
+ Parts[0].getSimpleValueType().getSizeInBits() &&
+ "Part type sizes don't match!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT, V);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ EVT BuiltVectorTy =
+ EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+ (IntermediateVT.isVector()
+ ? IntermediateVT.getVectorNumElements() * NumParts
+ : NumIntermediates));
+ Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, BuiltVectorTy, Ops);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ if (ValueVT.getVectorNumElements() != 1) {
+ // Certain ABIs require that vectors are passed as integers. For vectors
+ // are the same size, this is an obvious bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+ // Bitcast Val back the original type and extract the corresponding
+ // vector we want.
+ unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+ EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(), Elts);
+ Val = DAG.getBitcast(WiderVecType, Val);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ diagnosePossiblyInvalidConstraint(
+ *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+ return DAG.getUNDEF(ValueVT);
+ }
+
+ // Handle cases such as i8 -> <1 x i1>
+ EVT ValueSVT = ValueVT.getVectorElementType();
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
+ Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+ : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+
+ return DAG.getBuildVector(ValueVT, DL, Val);
+}
+
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ Optional<CallingConv::ID> CallConv);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ const Value *V,
+ Optional<CallingConv::ID> CallConv = None,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+ CallConv);
+
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartEVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ if (PartEVT != ValueVT) {
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "scalar-to-vector conversion failed");
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
+ CallConv);
+
+ if (DAG.getDataLayout().isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+static SDValue widenVectorToPartType(SelectionDAG &DAG,
+ SDValue Val, const SDLoc &DL, EVT PartVT) {
+ if (!PartVT.isVector())
+ return SDValue();
+
+ EVT ValueVT = Val.getValueType();
+ unsigned PartNumElts = PartVT.getVectorNumElements();
+ unsigned ValueNumElts = ValueVT.getVectorNumElements();
+ if (PartNumElts > ValueNumElts &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(Val, Ops);
+ SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
+ Ops.push_back(EltUndef);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+ return DAG.getBuildVector(PartVT, DL, Ops);
+ }
+
+ return SDValue();
+}
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V,
+ Optional<CallingConv::ID> CallConv) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const bool IsABIRegCopy = CallConv.hasValue();
+
+ if (NumParts == 1) {
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
+ Val = Widened;
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else {
+ if (ValueVT.getVectorNumElements() == 1) {
+ Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ } else {
+ assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+ "lossy conversion of vector to scalar type");
+ EVT IntermediateType =
+ EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getBitcast(IntermediateType, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ }
+ }
+
+ assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs;
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ unsigned IntermediateNumElts = IntermediateVT.isVector() ?
+ IntermediateVT.getVectorNumElements() : 1;
+
+ // Convert the vector to the appropiate type if necessary.
+ unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+
+ EVT BuiltVectorTy = EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ if (ValueVT != BuiltVectorTy) {
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
+ Val = Widened;
+
+ Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+ }
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector()) {
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+ } else {
+ Ops[i] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getConstant(i, DL, IdxVT));
+ }
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumIntermediates != 0 && "division by zero");
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
+ CallConv);
+ }
+}
+
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+ EVT valuevt, Optional<CallingConv::ID> CC)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+ RegCount(1, regs.size()), CallConv(CC) {}
+
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ Optional<CallingConv::ID> CC) {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs);
+
+ CallConv = CC;
+
+ for (EVT ValueVT : ValueVTs) {
+ unsigned NumRegs =
+ isABIMangled()
+ ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT =
+ isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ RegCount.push_back(NumRegs);
+ Reg += NumRegs;
+ }
+}
+
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain,
+ SDValue *Flag, const Value *V) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = RegCount[Value];
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (!Flag) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!Register::isVirtualRegister(Regs[Part + i]) ||
+ !RegisterVT.isInteger())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getScalarSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
+
+ if (NumZeroBits == RegSize) {
+ // The current value is a zero.
+ // Explicitly express that as it would be easier for
+ // optimizations to kick in.
+ Parts[i] = DAG.getConstant(0, dl, RegisterVT);
+ continue;
+ }
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt;
+ EVT FromVT(MVT::Other);
+ if (NumZeroBits) {
+ FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
+ isSExt = false;
+ } else if (NumSignBits > 1) {
+ FromVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
+ isSExt = true;
+ } else {
+ continue;
+ }
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
+ RegisterVT, ValueVT, V, CallConv);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
+}
+
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const Value *V,
+ ISD::NodeType PreferredExtendType) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ISD::NodeType ExtendKind = PreferredExtendType;
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumParts = RegCount[Value];
+
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
+
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
+ NumParts, RegisterVT, V, CallConv, ExtendKind);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (!Flag) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+}
+
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
+ Ops.push_back(Res);
+
+ if (Code == InlineAsm::Kind_Clobber) {
+ // Clobbers should always have a 1:1 mapping with registers, and may
+ // reference registers that have illegal (e.g. vector) types. Hence, we
+ // shouldn't try to apply any sort of splitting logic to them.
+ assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
+ "No 1:1 mapping from clobbers to regs?");
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ (void)SP;
+ for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
+ Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
+ assert(
+ (Regs[I] != SP ||
+ DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
+ "If we clobbered the stack pointer, MFI should know about it.");
+ }
+ return;
+ }
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ unsigned TheReg = Regs[Reg++];
+ Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+ }
+ }
+}
+
+SmallVector<std::pair<unsigned, unsigned>, 4>
+RegsForValue::getRegsAndSizes() const {
+ SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
+ unsigned I = 0;
+ for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
+ unsigned RegCount = std::get<0>(CountAndVT);
+ MVT RegisterVT = std::get<1>(CountAndVT);
+ unsigned RegisterSize = RegisterVT.getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I)
+ OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
+ }
+ return OutVec;
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
+ const TargetLibraryInfo *li) {
+ AA = aa;
+ GFI = gfi;
+ LibInfo = li;
+ DL = &DAG.getDataLayout();
+ Context = DAG.getContext();
+ LPadToCallSiteMap.clear();
+ SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
+}
+
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ CurInst = nullptr;
+ HasTailCall = false;
+ SDNodeOrder = LowestSDNodeOrder;
+ StatepointLowering.clear();
+}
+
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ PendingExports);
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (I.isTerminator()) {
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+ }
+
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
+
+ CurInst = &I;
+
+ visit(I.getOpcode(), I);
+
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
+ }
+ }
+
+ if (!I.isTerminator() && !HasTailCall &&
+ !isStatepoint(&I)) // statepoints handle their exports internally
+ CopyToExportRegsIfNeeded(&I);
+
+ CurInst = nullptr;
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
+#include "llvm/IR/Instruction.def"
+ }
+}
+
+void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr) {
+ auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
+ const DbgValueInst *DI = DDI.getDI();
+ DIVariable *DanglingVariable = DI->getVariable();
+ DIExpression *DanglingExpr = DI->getExpression();
+ if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
+ return true;
+ }
+ return false;
+ };
+
+ for (auto &DDIMI : DanglingDebugInfoMap) {
+ DanglingDebugInfoVector &DDIV = DDIMI.second;
+
+ // If debug info is to be dropped, run it through final checks to see
+ // whether it can be salvaged.
+ for (auto &DDI : DDIV)
+ if (isMatchingDbgValue(DDI))
+ salvageUnresolvedDbgValue(DDI);
+
+ DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
+ if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
+ return;
+
+ DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
+ for (auto &DDI : DDIV) {
+ const DbgValueInst *DI = DDI.getDI();
+ assert(DI && "Ill-formed DanglingDebugInfo");
+ DebugLoc dl = DDI.getdl();
+ unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ DILocalVariable *Variable = DI->getVariable();
+ DIExpression *Expr = DI->getExpression();
+ assert(Variable->isValidLocationForIntrinsic(dl) &&
+ "Expected inlined-at fields to agree");
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
+ // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
+ // we couldn't resolve it directly when examining the DbgValue intrinsic
+ // in the first place we should not be more successful here). Unless we
+ // have some test case that prove this to be correct we should avoid
+ // calling EmitFuncArgumentDbgValue here.
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
+ << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
+ // Increase the SDNodeOrder for the DbgValue here to make sure it is
+ // inserted after the definition of Val when emitting the instructions
+ // after ISel. An alternative could be to teach
+ // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
+ LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
+ << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
+ << ValSDNodeOrder << "\n");
+ SDV = getDbgValue(Val, Variable, Expr, dl,
+ std::max(DbgSDNodeOrder, ValSDNodeOrder));
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ } else
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
+ << "in EmitFuncArgumentDbgValue\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ auto Undef =
+ UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV =
+ DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ }
+ DDIV.clear();
+}
+
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+ Value *V = DDI.getDI()->getValue();
+ DILocalVariable *Var = DDI.getDI()->getVariable();
+ DIExpression *Expr = DDI.getDI()->getExpression();
+ DebugLoc DL = DDI.getdl();
+ DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+ unsigned SDOrder = DDI.getSDNodeOrder();
+
+ // Currently we consider only dbg.value intrinsics -- we tell the salvager
+ // that DW_OP_stack_value is desired.
+ assert(isa<DbgValueInst>(DDI.getDI()));
+ bool StackValue = true;
+
+ // Can this Value can be encoded without any further work?
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
+ return;
+
+ // Attempt to salvage back through as many instructions as possible. Bail if
+ // a non-instruction is seen, such as a constant expression or global
+ // variable. FIXME: Further work could recover those too.
+ while (isa<Instruction>(V)) {
+ Instruction &VAsInst = *cast<Instruction>(V);
+ DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
+
+ // If we cannot salvage any further, and haven't yet found a suitable debug
+ // expression, bail out.
+ if (!NewExpr)
+ break;
+
+ // New value and expr now represent this debuginfo.
+ V = VAsInst.getOperand(0);
+ Expr = NewExpr;
+
+ // Some kind of simplification occurred: check whether the operand of the
+ // salvaged debug expression can be encoded in this DAG.
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
+ LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
+ << DDI.getDI() << "\nBy stripping back to:\n " << V);
+ return;
+ }
+ }
+
+ // This was the final opportunity to salvage this debug information, and it
+ // couldn't be done. Place an undef DBG_VALUE at this location to terminate
+ // any earlier variable location.
+ auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
+ << "\n");
+}
+
+bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc dl,
+ DebugLoc InstDL, unsigned Order) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
+ SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+
+ // If the Value is a frame index, we can create a FrameIndex debug value
+ // without relying on the DAG at all.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ auto SDV =
+ DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
+ /*IsIndirect*/ false, dl, SDNodeOrder);
+ // Do not attach the SDNodeDbgValue to an SDNode: this variable location
+ // is still available even if the SDNode gets optimized out.
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+ }
+
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ return true;
+ SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ return true;
+ }
+
+ // Special rules apply for the first dbg.values of parameter variables in a
+ // function. Identify them by the fact they reference Argument Values, that
+ // they're parameters, and they are parameters of the current function. We
+ // need to let them dangle until they get an SDNode.
+ bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() &&
+ !InstDL.getInlinedAt();
+ if (!IsParamOfFunc) {
+ // The value is not used in this block yet (or it would have an SDNode).
+ // We still want the value to appear for the user if possible -- if it has
+ // an associated VReg, we can refer to that instead.
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // If this is a PHI node, it may be split up into several MI PHI nodes
+ // (in FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), None);
+ if (RFV.occupiesMultipleRegs()) {
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Var->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expr->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ unsigned RegisterSize = RegAndSize.second;
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
+ false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ Offset += RegisterSize;
+ }
+ } else {
+ SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void SelectionDAGBuilder::resolveOrClearDbgInfo() {
+ // Try to fixup any remaining dangling debug info -- and drop it if we can't.
+ for (auto &Pair : DanglingDebugInfoMap)
+ for (auto &DDI : Pair.second)
+ salvageUnresolvedDbgValue(DDI);
+ clearDanglingDebugInfo();
+}
+
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ SDValue Result;
+
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Ty,
+ None); // This is not an ABI copy.
+ SDValue Chain = DAG.getEntryNode();
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+ V);
+ resolveDanglingDebugInfo(V, Result);
+ }
+
+ return Result;
+}
+
+/// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
+ return copyFromReg;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+// Return true if SDValue exists for the given Value
+bool SelectionDAGBuilder::findValue(const Value *V) const {
+ return (NodeMap.find(V) != NodeMap.end()) ||
+ (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) {
+ if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
+ // Remove the debug location from the node as the node is about to be used
+ // in a location which may differ from the original debug location. This
+ // is relevant to Constant and ConstantFP nodes because they can appear
+ // as constant expressions inside PHI nodes.
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, getCurSDLoc(), VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout(), AS));
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(Ops, getCurSDLoc());
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CV->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT =
+ TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second,
+ TLI.getFrameIndexTy(DAG.getDataLayout()));
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+
+ RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
+ Inst->getType(), getABIRegCopyCC(V));
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ }
+
+ llvm_unreachable("Can't get register for value!");
+}
+
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ if (!IsSEH)
+ CatchPadMBB->setIsEHScopeEntry();
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+ // Wasm does not need catchpads anymore
+ if (!IsWasmCXX)
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
+ getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = I.getCatchSwitchParentPad();
+ const BasicBlock *SuccessorColor;
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+ else
+ SuccessorColor = cast<Instruction>(ParentPad)->getParent();
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of an EH scope/funclet.
+ FuncInfo.MBB->setIsEHScopeEntry();
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ if (Pers != EHPersonality::Wasm_CXX) {
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+ }
+}
+
+// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
+// the control flow always stops at the single catch pad, as it does for a
+// cleanup pad. In case the exception caught is not of the types the catch pad
+// catches, it will be rethrown by a rethrow.
+static void findWasmUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations. We don't
+ // continue to the unwind destination of the catchswitch for wasm.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ break;
+ } else {
+ continue;
+ }
+ }
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
+ assert(UnwindDests.size() <= 1 &&
+ "There should be at most one unwind destination for wasm");
+ return;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ // Calls to @llvm.experimental.deoptimize don't generate a return value, so
+ // lower
+ //
+ // %val = call <ty> @llvm.experimental.deoptimize()
+ // ret <ty> %val
+ //
+ // differently.
+ if (I.getParent()->getTerminatingDeoptimizeCall()) {
+ LowerDeoptimizingReturn();
+ return;
+ }
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, DL,
+ F->getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
+ &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ for (unsigned i = 0; i != NumValues; ++i) {
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+
+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
+ Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
+ // FIXME: better loc info would be nice.
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
+ MVT::Other, Chains);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ const Function *F = I.getParent()->getParent();
+
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ I.getOperand(0)->getType(), F->getCallingConv(),
+ /*IsVarArg*/ false);
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ LLVMContext &Context = F->getContext();
+ bool RetInReg = F->getAttributes().hasAttribute(
+ AttributeList::ReturnIndex, Attribute::InReg);
+
+ for (unsigned j = 0; j != NumValues; ++j) {
+ EVT VT = ValueVTs[j];
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
+
+ CallingConv::ID CC = F->getCallingConv();
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurSDLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (RetInReg)
+ Flags.setInReg();
+
+ if (I.getOperand(0)->getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
+ }
+
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (j == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ VT, /*isfixed=*/true, 0, 0));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ // Push in swifterror virtual register as the last element of Outs. This makes
+ // sure swifterror virtual register will be returned in the swifterror
+ // physical register.
+ const Function *F = I.getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+ assert(SwiftError.getFunctionArg() && "Need a swift error argument");
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ Flags.setSwiftError();
+ Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
+ EVT(TLI.getPointerTy(DL)) /*argvt*/,
+ true /*isfixed*/, 1 /*origidx*/,
+ 0 /*partOffs*/));
+ // Create SDNode for the swifterror virtual register.
+ OutVals.push_back(
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+ &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
+ EVT(TLI.getPointerTy(DL))));
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction().getCallingConv();
+ Chain = DAG.getTargetLoweringInfo().LowerReturn(
+ Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb,
+ BranchProbability FProb,
+ bool InvertCond) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
+ TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ InBlock(NotCond, CurBB->getBasicBlock())) {
+ FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ unsigned BOpc = 0;
+ if (BOp) {
+ BOpc = BOp->getOpcode();
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+ TProb, FProb, InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+ NewTrueProb, NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ Probs[0], Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+ NewTrueProb, NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ Probs[0], Probs[1], InvertCond);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.hasMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+ Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SL->SwitchCases)) {
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SL->SwitchCases[0], BrMBB);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ SDLoc dl = CB.DL;
+
+ if (CB.CC == ISD::SETTRUE) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ SwitchBB->normalizeSuccProbs();
+ if (CB.TrueBB != NextBlock(SwitchBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(CB.TrueBB)));
+ }
+ return;
+ }
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
+
+ // Build the setcc now.
+ if (!CB.CmpMHS) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else {
+ SDValue CondRHS = getValue(CB.CmpRHS);
+
+ // If a pointer's DAG type is larger than its memory type then the DAG
+ // values are zero-extended. This breaks signed comparisons so truncate
+ // back to the underlying type before doing the compare.
+ if (CondLHS.getValueType() != MemVT) {
+ CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
+ CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
+ }
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
+ }
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, dl, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock(SwitchBB)) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the lowest switch case value from the value being switched on.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+ DAG.getConstant(JTH.First, dl, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
+
+ unsigned JumpTableReg =
+ FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ if (!JTH.OmitRangeCheck) {
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+ } else {
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
+ DAG.getBasicBlock(JT.MBB)));
+ else
+ DAG.setRoot(CopyTo);
+ }
+}
+
+/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
+/// variable if there exists one.
+static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue &Chain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
+ MachineSDNode *Node =
+ DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
+ if (Global) {
+ MachinePointerInfo MPInfo(Global);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable;
+ MachineMemOperand *MemRef = MF.getMachineMemOperand(
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+ DAG.setNodeMemRefs(Node, {MemRef});
+ }
+ if (PtrTy != PtrMemTy)
+ return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
+ return SDValue(Node, 0);
+}
+
+/// Codegen a new tail for a stack protector check ParentMBB which has had its
+/// tail spliced into a stack protector check success bb.
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ SDValue Guard;
+ SDLoc dl = getCurSDLoc();
+ SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ SDValue GuardVal = DAG.getLoad(
+ PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
+ MachineMemOperand::MOVolatile);
+
+ if (TLI.useStackGuardXorFP())
+ GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = GuardVal;
+ Entry.Ty = FnTy->getParamType(0);
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Entry.IsInReg = true;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(DAG.getEntryNode())
+ .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheckFn), std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ SDValue Chain = DAG.getEntryNode();
+ if (TLI.useLoadStackGuardNode()) {
+ Guard = getLoadStackGuard(DAG, dl, Chain);
+ } else {
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ SDValue GuardPtr = getValue(IRGuard);
+
+ Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+ MachinePointerInfo(IRGuard, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+
+ // Perform the comparison via a subtract/getsetcc.
+ EVT VT = Guard.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
+
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+
+ // If the sub is not 0, then we know the guard/stackslot do not equal, so
+ // branch to failure MBB.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, GuardVal.getOperand(0),
+ Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
+ // Otherwise branch to success MBB.
+ SDValue Br = DAG.getNode(ISD::BR, dl,
+ MVT::Other, BrCond,
+ DAG.getBasicBlock(SPD.getSuccessMBB()));
+
+ DAG.setRoot(Br);
+}
+
+/// Codegen the failure basic block for a stack protector check.
+///
+/// A failure stack protector machine basic block consists simply of a call to
+/// __stack_chk_fail().
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void
+SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setDiscardResult(true);
+ SDValue Chain =
+ TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
+ None, CallOptions, getCurSDLoc()).second;
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ if (TM.getTargetTriple().isPS4CPU())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+
+ DAG.setRoot(Chain);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the minimum value.
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue RangeSub =
+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
+
+ // Determine the type of the test operands.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT)) {
+ UsePtrType = true;
+ } else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ SDValue Sub = RangeSub;
+ if (UsePtrType) {
+ VT = TLI.getPointerTy(DAG.getDataLayout());
+ Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
+ }
+
+ B.RegVT = VT.getSimpleVT();
+ B.Reg = FuncInfo.CreateReg(B.RegVT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue Root = CopyTo;
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ SDValue RangeCmp = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ RangeSub.getValueType()),
+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
+ ISD::SETUGT);
+
+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != NextBlock(SwitchBB))
+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(Root);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ BranchProbability BranchProbToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+ MVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = countPopulation(B.Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
+ ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
+ DAG.getConstant(1, dl, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, dl,
+ VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != NextBlock(SwitchBB))
+ BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower invokes with arbitrary operand bundles yet!");
+
+ const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else if (Fn && Fn->isIntrinsic()) {
+ switch (Fn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Cannot invoke this intrinsic");
+ case Intrinsic::donothing:
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(&I, EHPadBB);
+ break;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
+ break;
+ case Intrinsic::wasm_rethrow_in_catch: {
+ // This is usually done in visitTargetIntrinsic, but this intrinsic is
+ // special because it can be invoked, so we manually lower it to a DAG
+ // node here.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(getRoot()); // inchain
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
+ DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
+ break;
+ }
+ }
+ } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
+ // Currently we do not lower any intrinsic calls with deopt operand bundles.
+ // Eventually we will support lowering the @llvm.experimental.deoptimize
+ // intrinsic, and right now there are no plans to support other intrinsics
+ // with deopt state.
+ LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+ } else {
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
+ }
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ // We already took care of the exported value for the statepoint instruction
+ // during call to the LowerStatepoint.
+ if (!isStatepoint(I)) {
+ CopyToExportRegsIfNeeded(&I);
+ }
+
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
+ MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower callbrs with arbitrary operand bundles yet!");
+
+ assert(isa<InlineAsm>(I.getCalledValue()) &&
+ "Only know how to handle inlineasm callbr");
+ visitInlineAsm(&I);
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return);
+ for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
+ MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
+ addSuccessorWithProb(CallBrMBB, Target);
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ // Drop into default successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isEHPad() &&
+ "Call to landingpad not in landing pad!");
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
+ return;
+
+ SmallVector<EVT, 2> ValueVTs;
+ SDLoc dl = getCurSDLoc();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
+ assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
+
+ // Get the two live-in registers as SDValues. The physregs have already been
+ // copied into virtual registers.
+ SDValue Ops[2];
+ if (FuncInfo.ExceptionPointerVirtReg) {
+ Ops[0] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionPointerVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[0]);
+ } else {
+ Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ Ops[1] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionSelectorVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[1]);
+
+ // Merge into one.
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Ops);
+ setValue(&LP, Res);
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
+ if (SL->JTCases[i].first.HeaderBB == First)
+ SL->JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
+ if (SL->BitTestCases[i].Parent == First)
+ SL->BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB).second;
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
+ addSuccessorWithProb(IndirectBrMBB, Succ);
+ }
+ IndirectBrMBB->normalizeSuccProbs();
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
+ if (!DAG.getTarget().Options.TrapUnreachable)
+ return;
+
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *I.getParent();
+ if (&I != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(&I));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return;
+ }
+ }
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+ // -0.0 - X --> fneg
+ Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+/// Checks if the given instruction performs a vector reduction, in which case
+/// we have the freedom to alter the elements in the result as long as the
+/// reduction of them stays unchanged.
+static bool isVectorReductionOp(const User *I) {
+ const Instruction *Inst = dyn_cast<Instruction>(I);
+ if (!Inst || !Inst->getType()->isVectorTy())
+ return false;
+
+ auto OpCode = Inst->getOpcode();
+ switch (OpCode) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (FPOp->getFastMathFlags().isFast())
+ break;
+ LLVM_FALLTHROUGH;
+ default:
+ return false;
+ }
+
+ unsigned ElemNum = Inst->getType()->getVectorNumElements();
+ // Ensure the reduction size is a power of 2.
+ if (!isPowerOf2_32(ElemNum))
+ return false;
+
+ unsigned ElemNumToReduce = ElemNum;
+
+ // Do DFS search on the def-use chain from the given instruction. We only
+ // allow four kinds of operations during the search until we reach the
+ // instruction that extracts the first element from the vector:
+ //
+ // 1. The reduction operation of the same opcode as the given instruction.
+ //
+ // 2. PHI node.
+ //
+ // 3. ShuffleVector instruction together with a reduction operation that
+ // does a partial reduction.
+ //
+ // 4. ExtractElement that extracts the first element from the vector, and we
+ // stop searching the def-use chain here.
+ //
+ // 3 & 4 above perform a reduction on all elements of the vector. We push defs
+ // from 1-3 to the stack to continue the DFS. The given instruction is not
+ // a reduction operation if we meet any other instructions other than those
+ // listed above.
+
+ SmallVector<const User *, 16> UsersToVisit{Inst};
+ SmallPtrSet<const User *, 16> Visited;
+ bool ReduxExtracted = false;
+
+ while (!UsersToVisit.empty()) {
+ auto User = UsersToVisit.back();
+ UsersToVisit.pop_back();
+ if (!Visited.insert(User).second)
+ continue;
+
+ for (const auto &U : User->users()) {
+ auto Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ return false;
+
+ if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
+ return false;
+ UsersToVisit.push_back(U);
+ } else if (const ShuffleVectorInst *ShufInst =
+ dyn_cast<ShuffleVectorInst>(U)) {
+ // Detect the following pattern: A ShuffleVector instruction together
+ // with a reduction that do partial reduction on the first and second
+ // ElemNumToReduce / 2 elements, and store the result in
+ // ElemNumToReduce / 2 elements in another vector.
+
+ unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
+ if (ResultElements < ElemNum)
+ return false;
+
+ if (ElemNumToReduce == 1)
+ return false;
+ if (!isa<UndefValue>(U->getOperand(1)))
+ return false;
+ for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
+ if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
+ return false;
+ for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
+ if (ShufInst->getMaskValue(i) != -1)
+ return false;
+
+ // There is only one user of this ShuffleVector instruction, which
+ // must be a reduction operation.
+ if (!U->hasOneUse())
+ return false;
+
+ auto U2 = dyn_cast<Instruction>(*U->user_begin());
+ if (!U2 || U2->getOpcode() != OpCode)
+ return false;
+
+ // Check operands of the reduction operation.
+ if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
+ (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
+ UsersToVisit.push_back(U2);
+ ElemNumToReduce /= 2;
+ } else
+ return false;
+ } else if (isa<ExtractElementInst>(U)) {
+ // At this moment we should have reduced all elements in the vector.
+ if (ElemNumToReduce != 1)
+ return false;
+
+ const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!Val || !Val->isZero())
+ return false;
+
+ ReduxExtracted = true;
+ } else
+ return false;
+ }
+ }
+ return ReduxExtracted;
+}
+
+void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+
+ SDValue Op = getValue(I.getOperand(0));
+ SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
+ Op, Flags);
+ setValue(&I, UnNodeValue);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+ if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
+ }
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
+ Flags.setExact(ExactOp->isExact());
+ }
+ if (isVectorReductionOp(&I)) {
+ Flags.setVectorReduction(true);
+ LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+ }
+
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
+ Op1, Op2, Flags);
+ setValue(&I, BinNodeValue);
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
+ Op1.getValueType(), DAG.getDataLayout());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueSizeInBits();
+ SDLoc DL = getCurSDLoc();
+
+ // If the operand is smaller than the shift count type, promote it.
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ }
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+
+ if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ }
+ SDNodeFlags Flags;
+ Flags.setExact(exact);
+ Flags.setNoSignedWrap(nsw);
+ Flags.setNoUnsignedWrap(nuw);
+ SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+ Flags);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ SDNodeFlags Flags;
+ Flags.setExact(isa<PossiblyExactOperator>(&I) &&
+ cast<PossiblyExactOperator>(&I)->isExact());
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
+ Op2, Flags));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+
+ // If a pointer's DAG type is larger than its memory type then the DAG values
+ // are zero-extended. This breaks signed comparisons so truncate back to the
+ // underlying type before doing the compare.
+ if (Op1.getValueType() != MemVT) {
+ Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
+ Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
+ }
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ auto *FPMO = dyn_cast<FPMathOperator>(&I);
+ if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
+}
+
+// Check if the condition of the select has one use or two users that are both
+// selects with the same condition.
+static bool hasOnlySelectUsers(const Value *Cond) {
+ return llvm::all_of(Cond->users(), [](const Value *V) {
+ return isa<SelectInst>(V);
+ });
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue LHSVal = getValue(I.getOperand(1));
+ SDValue RHSVal = getValue(I.getOperand(2));
+ auto BaseOps = {Cond};
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
+
+ bool IsUnaryAbs = false;
+
+ // Min/max matching is only viable if all output VTs are the same.
+ if (is_splat(ValueVTs)) {
+ EVT VT = ValueVTs[0];
+ LLVMContext &Ctx = *DAG.getContext();
+ auto &TLI = DAG.getTargetLoweringInfo();
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
+ VT = TLI.getTypeToTransformTo(Ctx, VT);
+
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY: {
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ Opc = ISD::FMINNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
+ Opc = ISD::FMINIMUM;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+ ISD::FMINNUM : ISD::FMINIMUM;
+ break;
+ }
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ Opc = ISD::FMAXNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
+ Opc = ISD::FMAXIMUM;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+ ISD::FMAXNUM : ISD::FMAXIMUM;
+ break;
+ }
+ break;
+ case SPF_ABS:
+ IsUnaryAbs = true;
+ Opc = ISD::ABS;
+ break;
+ case SPF_NABS:
+ // TODO: we need to produce sub(0, abs(X)).
+ default: break;
+ }
+
+ if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
+ hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ RHSVal = getValue(RHS);
+ BaseOps = {};
+ }
+
+ if (IsUnaryAbs) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ BaseOps = {};
+ }
+ }
+
+ if (IsUnaryAbs) {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ Values[i] =
+ DAG.getNode(OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
+ SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ }
+ } else {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+ Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+ Values[i] = DAG.getNode(
+ OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+ }
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
+ DAG.getTargetConstant(
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I) {
+ // FPExt is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I) {
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ EVT PtrMemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, dl,
+ DestVT, N)); // convert types.
+ // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+ // might fold any kind of constant expression to an integer constant and that
+ // is not what we are looking for. Only recognize a bitcast of a genuine
+ // constant integer as an opaque constant.
+ else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+ setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
+ /*isOpaque*/true));
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InIdx));
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+ Constant *MaskV = cast<Constant>(I.getOperand(2));
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (MaskV->isNullValue() && VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src1,
+ DAG.getConstant(0, DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned MaskNumElts = Mask.size();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts) {
+ // Mask is longer than the source vectors. We can use concatenate vector to
+ // make the mask and vectors lengths match.
+
+ if (MaskNumElts % SrcNumElts == 0) {
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
+ }
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
+ }
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+ return;
+ }
+ }
+
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ PaddedMaskNumElts);
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - PaddedMaskNumElts;
+ MappedOps[i] = Idx;
+ }
+
+ SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+ // If the concatenated vector was padded, extract a subvector with the
+ // correct number of elements.
+ if (MaskNumElts != PaddedMaskNumElts)
+ Result = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ setValue(&I, Result);
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
+ unsigned Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
+ }
+
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ if (CanExtract) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (StartIdx[Input] < 0)
+ Src = DAG.getUNDEF(VT);
+ else {
+ Src = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+ DAG.getConstant(StartIdx[Input], DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SmallVector<SDValue,8> Ops;
+ for (int Idx : Mask) {
+ SDValue Res;
+
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
+ }
+
+ Ops.push_back(Res);
+ }
+
+ setValue(&I, DAG.getBuildVector(VT, DL, Ops));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
+ Indices = IV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ // Ignore an insertvalue that produces an empty object
+ if (!NumAggValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(AggValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
+ Indices = EV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
+ const Value *Op0 = I.getOperand(0);
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ Value *Op0 = I.getOperand(0);
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
+ SDValue N = getValue(Op0);
+ SDLoc dl = getCurSDLoc();
+ auto &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
+
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ unsigned VectorWidth = I.getType()->isVectorTy() ?
+ I.getType()->getVectorNumElements() : 0;
+
+ if (VectorWidth && !N.getValueType().isVector()) {
+ LLVMContext &Context = *DAG.getContext();
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
+ N = DAG.getSplatBuildVector(VT, dl, N);
+ }
+
+ for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+ // In an inbounds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
+ DAG.getConstant(Offset, dl, N.getValueType()), Flags);
+ }
+ } else {
+ unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
+ MVT IdxTy = MVT::getIntegerVT(IdxSize);
+ APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ const auto *C = dyn_cast<Constant>(Idx);
+ if (C && isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+
+ if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+ LLVMContext &Context = *DAG.getContext();
+ SDValue OffsVal = VectorWidth ?
+ DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, IdxTy);
+
+ // In an inbounds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ SDValue IdxN = getValue(Idx);
+
+ if (!IdxN.getValueType().isVector() && VectorWidth) {
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
+ IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ }
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
+ IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, dl,
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
+ N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ SDLoc dl = getCurSDLoc();
+ Type *Ty = I.getAllocatedType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, dl, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize =
+ DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
+
+ SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitLoadFromSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return visitLoadFromSwiftError(I);
+ }
+ }
+
+ SDValue Ptr = getValue(SV);
+
+ Type *Ty = I.getType();
+
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
+ bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
+ bool isDereferenceable =
+ isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
+ unsigned Alignment = I.getAlignment();
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (isVolatile || NumValues > MaxParallelChains)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SV,
+ LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SDLoc dl = getCurSDLoc();
+
+ if (isVolatile)
+ Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
+
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue A = DAG.getNode(ISD::ADD, dl,
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], dl, PtrVT),
+ Flags);
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (isVolatile)
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ MMOFlags |= MachineMemOperand::MOInvariant;
+ if (isDereferenceable)
+ MMOFlags |= MachineMemOperand::MODereferenceable;
+ MMOFlags |= TLI.getMMOFlags(I);
+
+ SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
+ MachinePointerInfo(SV, Offsets[i]), Alignment,
+ MMOFlags, AAInfo, Ranges);
+ Chains[ChainI] = L.getValue(1);
+
+ if (MemVTs[i] != ValueVTs[i])
+ L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
+
+ Values[i] = L;
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitStoreToSwiftError when backend supports swifterror");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ const Value *SrcV = I.getOperand(0);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ SDValue Src = getValue(SrcV);
+ // Create a virtual register, then update the virtual register.
+ Register VReg =
+ SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
+ // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
+ // Chain can be getRoot or getControlRoot.
+ SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
+ SDValue(Src.getNode(), Src.getResNo()));
+ DAG.setRoot(CopyNode);
+}
+
+void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitLoadFromSwiftError when backend supports swifterror");
+
+ assert(!I.isVolatile() &&
+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&
+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&
+ "Support volatile, non temporal, invariant for load_from_swift_error");
+
+ const Value *SV = I.getOperand(0);
+ Type *Ty = I.getType();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ assert(
+ (!AA ||
+ !AA->pointsToConstantMemory(MemoryLocation(
+ SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+ AAInfo))) &&
+ "load_from_swift_error should not be constant memory");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
+ ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
+ SDValue L = DAG.getCopyFromReg(
+ getRoot(), getCurSDLoc(),
+ SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
+
+ setValue(&I, L);
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitStoreToSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return visitStoreToSwiftError(I);
+ }
+ }
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ SDLoc dl = getCurSDLoc();
+ EVT PtrVT = Ptr.getValueType();
+ unsigned Alignment = I.getAlignment();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (I.isVolatile())
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+ MMOFlags |= TLI.getMMOFlags(I);
+
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
+ SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
+ SDValue St =
+ DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
+ Alignment, MMOFlags, AAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ DAG.setRoot(StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+ bool IsCompressing) {
+ SDLoc sdl = getCurSDLoc();
+
+ auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Mask = I.getArgOperand(3);
+ };
+ auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // llvm.masked.compressstore.*(Src0, Ptr, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Mask = I.getArgOperand(2);
+ Alignment = 0;
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ unsigned Alignment;
+ if (IsCompressing)
+ getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
+
+ EVT VT = Src0.getValueType();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
+ MMO, false /* Truncating */,
+ IsCompressing);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the splat value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+ ISD::MemIndexType &IndexType, SDValue &Scale,
+ SelectionDAGBuilder *SDB) {
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return false;
+
+ const Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
+ return false;
+
+ unsigned FinalIndex = GEP->getNumOperands() - 1;
+ Value *IndexVal = GEP->getOperand(FinalIndex);
+
+ // Ensure all the other indices are 0.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
+ return false;
+ }
+
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+ return false;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
+
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
+ }
+ return true;
+}
+
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+ const Value *Ptr = I.getArgOperand(1);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ SDValue Base;
+ SDValue Index;
+ ISD::MemIndexType IndexType;
+ SDValue Scale;
+ const Value *BasePtr = Ptr;
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
+
+ const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
+ SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
+ Ops, MMO, IndexType);
+ DAG.setRoot(Scatter);
+ setValue(&I, Scatter);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
+ SDLoc sdl = getCurSDLoc();
+
+ auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Mask = I.getArgOperand(2);
+ Src0 = I.getArgOperand(3);
+ };
+ auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // @llvm.masked.expandload.*(Ptr, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = 0;
+ Mask = I.getArgOperand(1);
+ Src0 = I.getArgOperand(2);
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ unsigned Alignment;
+ if (IsExpanding)
+ getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
+
+ EVT VT = Src0.getValueType();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ // Do not serialize masked loads of constant memory with anything.
+ bool AddToChain =
+ !AA || !AA->pointsToConstantMemory(MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo));
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
+ ISD::NON_EXTLOAD, IsExpanding);
+ if (AddToChain)
+ PendingLoads.push_back(Load.getValue(1));
+ setValue(&I, Load);
+}
+
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+ const Value *Ptr = I.getArgOperand(0);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SDValue Root = DAG.getRoot();
+ SDValue Base;
+ SDValue Index;
+ ISD::MemIndexType IndexType;
+ SDValue Scale;
+ const Value *BasePtr = Ptr;
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
+ bool ConstantMemory = false;
+ if (UniformBase && AA &&
+ AA->pointsToConstantMemory(
+ MemoryLocation(BasePtr,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ }
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
+ SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
+ Ops, MMO, IndexType);
+
+ SDValue OutChain = Gather.getValue(1);
+ if (!ConstantMemory)
+ PendingLoads.push_back(OutChain);
+ setValue(&I, Gather);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
+ AtomicOrdering FailureOrdering = I.getFailureOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, MemVT.getStoreSize(), Alignment,
+ AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
+
+ SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MMO);
+
+ SDValue OutChain = L.getValue(2);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ SDLoc dl = getCurSDLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation");
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
+ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
+ }
+ AtomicOrdering Ordering = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), Alignment, AAMDNodes(),
+ nullptr, SSID, Ordering);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getValOperand()),
+ MMO);
+
+ SDValue OutChain = L.getValue(1);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlignment() < MemVT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ auto Flags = MachineMemOperand::MOLoad;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ if (I.hasMetadata(LLVMContext::MD_invariant_load))
+ Flags |= MachineMemOperand::MOInvariant;
+ if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
+ DAG.getDataLayout()))
+ Flags |= MachineMemOperand::MODereferenceable;
+
+ Flags |= TLI.getMMOFlags(I);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, MemVT.getStoreSize(),
+ I.getAlignment() ? I.getAlignment() :
+ DAG.getEVTAlignment(MemVT),
+ AAMDNodes(), nullptr, SSID, Order);
+
+ InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
+
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for loads to prevent future divergence.
+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ SDValue OutChain = L.getValue(1);
+ if (!I.isUnordered())
+ DAG.setRoot(OutChain);
+ else
+ PendingLoads.push_back(OutChain);
+ return;
+ }
+
+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
+ Ptr, MMO);
+
+ SDValue OutChain = L.getValue(1);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ SDLoc dl = getCurSDLoc();
+
+ AtomicOrdering Ordering = I.getOrdering();
+ SyncScope::ID SSID = I.getSyncScopeID();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+
+ if (I.getAlignment() < MemVT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ auto Flags = MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= TLI.getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
+ nullptr, SSID, Ordering);
+
+ SDValue Val = getValue(I.getValueOperand());
+ if (Val.getValueType() != MemVT)
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for stores to prevent future divergence.
+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ DAG.setRoot(S);
+ return;
+ }
+ SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
+ Ptr, Val, MMO);
+
+
+ DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ // Ignore the callsite's attributes. A specific call site may be marked with
+ // readnone, but the lowering code will expect the chain based on the
+ // definition.
+ const Function *F = I.getCalledFunction();
+ bool HasChain = !F->doesNotAccessMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+ DAG.getMachineFunction(),
+ Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ const Value *Arg = I.getArgOperand(i);
+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {
+ Ops.push_back(getValue(Arg));
+ continue;
+ }
+
+ // Use TargetConstant instead of a regular constant for immarg.
+ EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
+ } else {
+ Ops.push_back(
+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
+ }
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ Result = DAG.getMemIntrinsicNode(
+ Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
+ Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
+ } else
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+
+ setValue(&I, Result);
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, dl, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
+ const TargetLowering &TLI, const SDLoc &dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, dl, MVT::i32));
+ SDValue t1 = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, dl, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
+ const SDLoc &dl) {
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
+ MVT::f32);
+}
+
+static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ // IntegerPartOfX = ((int32_t)(t0);
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = t0 - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234, dl));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000, dl));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // t0 = Op * log2(e)
+
+ // TODO: What fast-math-flags should be set here?
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+}
+
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2).
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent =
+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue LogOfMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+}
+
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+}
+
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a, dl));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue Log10ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+}
+
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+ return getLimitedPrecisionExp2(Op, dl, DAG);
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool IsExp10 = false;
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
+ }
+ }
+
+ // TODO: What fast-math-flags should be set on the FMUL node?
+ if (IsExp10) {
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // t0 = Op * LOG2OF10;
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
+ getF32Constant(DAG, 0x40549a78, dl));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+}
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, DL, LHS.getValueType());
+
+ const Function &F = DAG.getMachineFunction().getFunction();
+ if (!F.hasOptSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
+ countPopulation(Val) + Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getUnderlyingArgRegs - Find underlying registers used for a truncated,
+// bitcasted, or split argument. Returns a list of <Register, size in bits>
+static void
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg: {
+ SDValue Op = N.getOperand(1);
+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
+ Op.getValueType().getSizeInBits());
+ return;
+ }
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ getUnderlyingArgRegs(Regs, N.getOperand(0));
+ return;
+ case ISD::BUILD_PAIR:
+ case ISD::BUILD_VECTOR:
+ case ISD::CONCAT_VECTORS:
+ for (SDValue Op : N->op_values())
+ getUnderlyingArgRegs(Regs, Op);
+ return;
+ default:
+ return;
+ }
+}
+
+/// If the DbgValueInst is a dbg_value of a function argument, create the
+/// corresponding DBG_VALUE machine instruction for it now. At the end of
+/// instruction selection, they will be inserted to the entry BB.
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+ const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+ DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ if (!IsDbgDeclare) {
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
+ // the entry block.
+ bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
+ if (!IsInEntryBlock)
+ return false;
+
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
+ // variable that also is a param.
+ //
+ // Although, if we are at the top of the entry block already, we can still
+ // emit using ArgDbgValue. This might catch some situations when the
+ // dbg.value refers to an argument that isn't used in the entry block, so
+ // any CopyToReg node would be optimized out and the only way to express
+ // this DBG_VALUE is by using the physical reg (or FI) as done in this
+ // method. ArgDbgValues are hoisted to the beginning of the entry block. So
+ // we should only emit as ArgDbgValue if the Variable is an argument to the
+ // current function, and the dbg.value intrinsic is found in the entry
+ // block.
+ bool VariableIsFunctionInputArg = Variable->isParameter() &&
+ !DL->getInlinedAt();
+ bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
+ if (!IsInPrologue && !VariableIsFunctionInputArg)
+ return false;
+
+ // Here we assume that a function argument on IR level only can be used to
+ // describe one input parameter on source level. If we for example have
+ // source code like this
+ //
+ // struct A { long x, y; };
+ // void foo(struct A a, long b) {
+ // ...
+ // b = a.x;
+ // ...
+ // }
+ //
+ // and IR like this
+ //
+ // define void @foo(i32 %a1, i32 %a2, i32 %b) {
+ // entry:
+ // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %b, "b",
+ // ...
+ // call void @llvm.dbg.value(metadata i32 %a1, "b"
+ // ...
+ //
+ // then the last dbg.value is describing a parameter "b" using a value that
+ // is an argument. But since we already has used %a1 to describe a parameter
+ // we should not handle that last dbg.value here (that would result in an
+ // incorrect hoisting of the DBG_VALUE to the function entry).
+ // Notice that we allow one dbg.value per IR level argument, to accomodate
+ // for the situation with fragments above.
+ if (VariableIsFunctionInputArg) {
+ unsigned ArgNo = Arg->getArgNo();
+ if (ArgNo >= FuncInfo.DescribedArgs.size())
+ FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
+ else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
+ return false;
+ FuncInfo.DescribedArgs.set(ArgNo);
+ }
+ }
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ bool IsIndirect = false;
+ Optional<MachineOperand> Op;
+ // Some arguments' frame index is recorded during argument lowering.
+ int FI = FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != std::numeric_limits<int>::max())
+ Op = MachineOperand::CreateFI(FI);
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
+ if (!Op && N.getNode()) {
+ getUnderlyingArgRegs(ArgRegsAndSizes, N);
+ Register Reg;
+ if (ArgRegsAndSizes.size() == 1)
+ Reg = ArgRegsAndSizes.front().first;
+
+ if (Reg && Reg.isVirtual()) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ Register PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ if (Reg) {
+ Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = IsDbgDeclare;
+ }
+ }
+
+ if (!Op && N.getNode()) {
+ // Check if frame index is available.
+ SDValue LCandidate = peekThroughBitcasts(N);
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ Op = MachineOperand::CreateFI(FINode->getIndex());
+ }
+
+ if (!Op) {
+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
+ auto splitMultiRegDbgValue
+ = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ unsigned Offset = 0;
+ for (auto RegAndSize : SplitRegs) {
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ RegAndSize.first, Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
+ }
+ };
+
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::const_iterator
+ VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
+ V->getType(), getABIRegCopyCC(V));
+ if (RFV.occupiesMultipleRegs()) {
+ splitMultiRegDbgValue(RFV.getRegsAndSizes());
+ return true;
+ }
+
+ Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = IsDbgDeclare;
+ } else if (ArgRegsAndSizes.size() > 1) {
+ // This was split due to the calling convention, and no virtual register
+ // mapping exists for the value.
+ splitMultiRegDbgValue(ArgRegsAndSizes);
+ return true;
+ }
+ }
+
+ if (!Op)
+ return false;
+
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ if (IsIndirect)
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ *Op, Variable, Expr));
+
+ return true;
+}
+
+/// Return the appropriate SDDbgValue based on N.
+SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
+ DILocalVariable *Variable,
+ DIExpression *Expr,
+ const DebugLoc &dl,
+ unsigned DbgSDNodeOrder) {
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
+ // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
+ // stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
+ // debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
+ }
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::smul_fix:
+ return ISD::SMULFIX;
+ case Intrinsic::umul_fix:
+ return ISD::UMULFIX;
+ default:
+ llvm_unreachable("Unhandled fixed point intrinsic");
+ }
+}
+
+void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
+ const char *FunctionName) {
+ assert(FunctionName && "FunctionName must not be nullptr");
+ SDValue Callee = DAG.getExternalSymbol(
+ FunctionName,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc sdl = getCurSDLoc();
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ case Intrinsic::vastart: visitVAStart(I); return;
+ case Intrinsic::vaend: visitVAEnd(I); return;
+ case Intrinsic::vacopy: visitVACopy(I); return;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::addressofreturnaddress:
+ setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return;
+ case Intrinsic::sponentry:
+ setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getFrameIndexTy(DAG.getDataLayout())));
+ return;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
+ TLI.getFrameIndexTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::read_register: {
+ Value *Reg = I.getArgOperand(0);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::READ_REGISTER, sdl,
+ DAG.getVTList(VT, MVT::Other), Chain, RegName);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::write_register: {
+ Value *Reg = I.getArgOperand(0);
+ Value *RegValue = I.getArgOperand(1);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
+ RegName, getValue(RegValue)));
+ return;
+ }
+ case Intrinsic::setjmp:
+ lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
+ return;
+ case Intrinsic::longjmp:
+ lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
+ return;
+ case Intrinsic::memcpy: {
+ const auto &MCI = cast<MemCpyInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ false, isTC,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memset: {
+ const auto &MSI = cast<MemSetInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
+ bool isVol = MSI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ isTC, MachinePointerInfo(I.getArgOperand(0)));
+ updateDAGForMaybeTailCall(MS);
+ return;
+ }
+ case Intrinsic::memmove: {
+ const auto &MMI = cast<MemMoveInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MMI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memmove DAG
+ // node.
+ SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MM);
+ return;
+ }
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memmove_element_unordered_atomic: {
+ auto &MI = cast<AtomicMemMoveInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memset_element_unordered_atomic: {
+ auto &MI = cast<AtomicMemSetInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Val = getValue(MI.getValue());
+ SDValue Length = getValue(MI.getLength());
+
+ unsigned DstAlign = MI.getDestAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
+ LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::dbg_addr:
+ case Intrinsic::dbg_declare: {
+ const auto &DI = cast<DbgVariableIntrinsic>(I);
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+ assert(Variable && "Missing variable");
+
+ // Check if address has undef value.
+ const Value *Address = DI.getVariableLocation();
+ if (!Address || isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return;
+ }
+
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+
+ // Check if this variable can be described by a frame index, typically
+ // either as a static alloca or a byval parameter.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI =
+ dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
+ if (AI->isStaticAlloca()) {
+ auto I = FuncInfo.StaticAllocaMap.find(AI);
+ if (I != FuncInfo.StaticAllocaMap.end())
+ FI = I->second;
+ }
+ } else if (const auto *Arg = dyn_cast<Argument>(
+ Address->stripInBoundsConstantOffsets())) {
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+ }
+
+ // llvm.dbg.addr is control dependent and always generates indirect
+ // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
+ // the MachineFunction variable table.
+ if (FI != std::numeric_limits<int>::max()) {
+ if (Intrinsic == Intrinsic::dbg_addr) {
+ SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
+ Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ }
+ return;
+ }
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV =
+ DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
+ /*IsIndirect*/ true, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
+ return;
+ } else {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ true, dl, SDNodeOrder);
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
+ N)) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+ return;
+ }
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(I);
+ DILabel *Label = DI.getLabel();
+ assert(Label && "Missing label");
+
+ SDDbgLabel *SDV;
+ SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
+ DAG.AddDbgLabel(SDV);
+ return;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ assert(DI.getVariable() && "Missing variable");
+
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+ const Value *V = DI.getValue();
+ if (!V)
+ return;
+
+ if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
+ SDNodeOrder))
+ return;
+
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE, which
+ // we should ideally fill with an extra Undef DBG_VALUE.
+
+ DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
+ return;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, sdl, MVT::i32);
+ setValue(&I, Res);
+ return;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().setCallsUnwindInit(true);
+ return;
+ case Intrinsic::eh_dwarf_cfa:
+ setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI.setFunctionContextIndex(FI);
+ return;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return;
+ }
+ case Intrinsic::eh_sjlj_longjmp:
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::eh_sjlj_setup_dispatch:
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return;
+ case Intrinsic::masked_gather:
+ visitMaskedGather(I);
+ return;
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return;
+ case Intrinsic::masked_scatter:
+ visitMaskedScatter(I);
+ return;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return;
+ case Intrinsic::masked_expandload:
+ visitMaskedLoad(I, true /* IsExpanding */);
+ return;
+ case Intrinsic::masked_compressstore:
+ visitMaskedStore(I, true /* IsCompressing */);
+ return;
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return;
+ case Intrinsic::log:
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return;
+ case Intrinsic::log2:
+ setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return;
+ case Intrinsic::log10:
+ setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return;
+ case Intrinsic::exp:
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return;
+ case Intrinsic::exp2:
+ setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return;
+ case Intrinsic::pow:
+ setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI));
+ return;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::canonicalize: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::lround: Opcode = ISD::LROUND; break;
+ case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+ case Intrinsic::lrint: Opcode = ISD::LRINT; break;
+ case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
+ }
+
+ EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+ getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::minimum:
+ setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::maximum:
+ setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return;
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
+ visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
+ return;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT)) {
+ setValue(&I, DAG.getNode(ISD::FMA, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ // TODO: Intrinsic calls should have fast-math-flags.
+ SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return;
+ }
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ MVT::i32))));
+ return;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
+ return;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
+ return;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
+ return;
+ }
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ bool IsFSHL = Intrinsic == Intrinsic::fshl;
+ SDValue X = getValue(I.getArgOperand(0));
+ SDValue Y = getValue(I.getArgOperand(1));
+ SDValue Z = getValue(I.getArgOperand(2));
+ EVT VT = X.getValueType();
+ SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+ SDValue Zero = DAG.getConstant(0, sdl, VT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
+
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
+ return;
+ }
+
+ // When X == Y, this is rotate. If the data type has a power-of-2 size, we
+ // avoid the select that is necessary in the general case to filter out
+ // the 0-shift possibility that leads to UB.
+ if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ return;
+ }
+
+ // Some targets only rotate one way. Try the opposite direction.
+ RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
+ if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+ // Negate the shift amount because it is safe to ignore the high bits.
+ SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
+ return;
+ }
+
+ // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
+ // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
+ SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+ SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
+ setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
+ return;
+ }
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = MVT::i1;
+ if (VT.isVector())
+ CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
+ return;
+ }
+ case Intrinsic::sadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::uadd_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::ssub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::usub_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1.getValueType(), Op1, Op2, Op3));
+ return;
+ }
+ case Intrinsic::smul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
+ case Intrinsic::umul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(
+ ISD::STACKSAVE, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::stackrestore:
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
+ return;
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return;
+ }
+ case Intrinsic::stackguard: {
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Module &M = *MF.getFunction().getParent();
+ SDValue Chain = getRoot();
+ if (TLI.useLoadStackGuardNode()) {
+ Res = getLoadStackGuard(DAG, sdl, Chain);
+ } else {
+ const Value *Global = TLI.getSDagStackGuard(M);
+ unsigned Align = DL->getPrefTypeAlignment(Global->getType());
+ Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
+ MachinePointerInfo(Global, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+ if (TLI.useStackGuardXorFP())
+ Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
+ DAG.setRoot(Chain);
+ setValue(&I, Res);
+ return;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue Src, Chain = getRoot();
+
+ if (TLI.useLoadStackGuardNode())
+ Src = getLoadStackGuard(DAG, sdl, Chain);
+ else
+ Src = getValue(I.getArgOperand(0)); // The guard's value.
+
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI.setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
+ /* Alignment = */ 0, MachineMemOperand::MOVolatile);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ // Drop the intrinsic, but forward the value
+ setValue(&I, getValue(I.getOperand(0)));
+ return;
+ case Intrinsic::assume:
+ case Intrinsic::var_annotation:
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
+ return;
+
+ case Intrinsic::codeview_annotation: {
+ // Emit a label associated with this metadata.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MCSymbol *Label =
+ MF.getMMI().getContext().createTempSymbol("annotation", true);
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
+ Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
+ DAG.setRoot(Res);
+ return;
+ }
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
+
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::adjust_trampoline:
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return;
+ case Intrinsic::gcroot: {
+ assert(DAG.getMachineFunction().getFunction().hasGC() &&
+ "only valid in functions with gc specified, enforced by Verifier");
+ assert(GFI && "implied by previous");
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ return;
+ }
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
+ return;
+
+ case Intrinsic::expect:
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return;
+
+ case Intrinsic::debugtrap:
+ case Intrinsic::trap: {
+ StringRef TrapFuncName =
+ I.getAttributes()
+ .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
+ .getValueAsString();
+ if (TrapFuncName.empty()) {
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
+ return;
+ }
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ CallingConv::C, I.getType(),
+ DAG.getExternalSymbol(TrapFuncName.data(),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ EVT ResultVT = Op1.getValueType();
+ EVT OverflowVT = MVT::i1;
+ if (ResultVT.isVector())
+ OverflowVT = EVT::getVectorVT(
+ *Context, OverflowVT, ResultVT.getVectorNumElements());
+
+ SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
+ setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
+ return;
+ }
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
+ Ops[0] = DAG.getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
+ DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ Flags);
+
+ // Chain the prefetch in parallell with any pending loads, to stay out of
+ // the way of later optimizations.
+ PendingLoads.push_back(Result);
+ Result = getRoot();
+ DAG.setRoot(Result);
+ return;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ const int64_t ObjectSize =
+ cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
+ Value *const ObjectPtr = I.getArgOperand(1);
+ SmallVector<const Value *, 4> Allocas;
+ GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
+
+ for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
+ E = Allocas.end(); Object != E; ++Object) {
+ const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ // First check that the Alloca is static, otherwise it won't have a
+ // valid frame index.
+ auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return;
+
+ const int FrameIndex = SI->second;
+ int64_t Offset;
+ if (GetPointerBaseWithConstantOffset(
+ ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
+ Offset = -1; // Cannot determine offset from alloca to lifetime object.
+ Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
+ Offset);
+ DAG.setRoot(Res);
+ }
+ return;
+ }
+ case Intrinsic::invariant_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
+ return;
+ case Intrinsic::invariant_end:
+ // Discard region information.
+ return;
+ case Intrinsic::clear_cache:
+ /// FunctionName may be null.
+ if (const char *FunctionName = TLI.getClearCacheBuiltinName())
+ lowerCallToExternalSymbol(I, FunctionName);
+ return;
+ case Intrinsic::donothing:
+ // ignore
+ return;
+ case Intrinsic::experimental_stackmap:
+ visitStackmap(I);
+ return;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(&I);
+ return;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(ImmutableStatepoint(&I));
+ return;
+ case Intrinsic::experimental_gc_result:
+ visitGCResult(cast<GCResultInst>(I));
+ return;
+ case Intrinsic::experimental_gc_relocate:
+ visitGCRelocate(cast<GCRelocateInst>(I));
+ return;
+ case Intrinsic::instrprof_increment:
+ llvm_unreachable("instrprof failed to lower an increment");
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
+ case Intrinsic::localescape: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+ AllocaInst *Slot = cast<AllocaInst>(Arg);
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only escape static allocas");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::LOCAL_ESCAPE))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+ }
+
+ return;
+ }
+
+ case Intrinsic::localrecover: {
+ // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
+
+ // Get the symbol that defines the frame offset.
+ auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+ unsigned IdxVal =
+ unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
+
+ // Create a MCSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ setValue(&I, Add);
+
+ return;
+ }
+
+ case Intrinsic::eh_exceptionpointer:
+ case Intrinsic::eh_exceptioncode: {
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ SDValue N =
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ setValue(&I, N);
+ return;
+ }
+ case Intrinsic::xray_customevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention, and only for x86_64.
+ // FIXME: Support other platforms later.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return;
+
+ SDLoc DL = getCurSDLoc();
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ SDValue LogEntryVal = getValue(I.getArgOperand(0));
+ SDValue StrSizeVal = getValue(I.getArgOperand(1));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
+ DL, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return;
+ }
+ case Intrinsic::xray_typedevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention, and only for x86_64.
+ // FIXME: Support other platforms later.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return;
+
+ SDLoc DL = getCurSDLoc();
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ // It's unclear to me how manipulating the selection DAG here forces callers
+ // to provide arguments in registers instead of on the stack.
+ SDValue LogTypeId = getValue(I.getArgOperand(0));
+ SDValue LogEntryVal = getValue(I.getArgOperand(1));
+ SDValue StrSizeVal = getValue(I.getArgOperand(2));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogTypeId);
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return;
+ }
+ case Intrinsic::experimental_deoptimize:
+ LowerDeoptimizeCall(&I);
+ return;
+
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ visitVectorReduce(I, Intrinsic);
+ return;
+
+ case Intrinsic::icall_branch_funnel: {
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(getValue(I.getArgOperand(0)));
+
+ int64_t Offset;
+ auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(1), Offset, DAG.getDataLayout()));
+ if (!Base)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+
+ struct BranchFunnelTarget {
+ int64_t Offset;
+ SDValue Target;
+ };
+ SmallVector<BranchFunnelTarget, 8> Targets;
+
+ for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(Op), Offset, DAG.getDataLayout()));
+ if (ElemBase != Base)
+ report_fatal_error("all llvm.icall.branch.funnel operands must refer "
+ "to the same GlobalValue");
+
+ SDValue Val = getValue(I.getArgOperand(Op + 1));
+ auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
+ if (!GA)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Targets.push_back({Offset, DAG.getTargetGlobalAddress(
+ GA->getGlobal(), getCurSDLoc(),
+ Val.getValueType(), GA->getOffset())});
+ }
+ llvm::sort(Targets,
+ [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
+ return T1.Offset < T2.Offset;
+ });
+
+ for (auto &T : Targets) {
+ Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(T.Target);
+ }
+
+ Ops.push_back(DAG.getRoot()); // Chain
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
+ getCurSDLoc(), MVT::Other, Ops),
+ 0);
+ DAG.setRoot(N);
+ setValue(&I, N);
+ HasTailCall = true;
+ return;
+ }
+
+ case Intrinsic::wasm_landingpad_index:
+ // Information this intrinsic contained has been transferred to
+ // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
+ // delete it now.
+ return;
+
+ case Intrinsic::aarch64_settag:
+ case Intrinsic::aarch64_settag_zero: {
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+ SDValue Val = TSI.EmitTargetCodeForSetTag(
+ DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+ ZeroMemory);
+ DAG.setRoot(Val);
+ setValue(&I, Val);
+ return;
+ }
+ case Intrinsic::ptrmask: {
+ SDValue Ptr = getValue(I.getOperand(0));
+ SDValue Const = getValue(I.getOperand(1));
+
+ EVT DestVT =
+ EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ return;
+ }
+ }
+}
+
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI) {
+ SDLoc sdl = getCurSDLoc();
+ unsigned Opcode;
+ switch (FPI.getIntrinsicID()) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::experimental_constrained_fadd:
+ Opcode = ISD::STRICT_FADD;
+ break;
+ case Intrinsic::experimental_constrained_fsub:
+ Opcode = ISD::STRICT_FSUB;
+ break;
+ case Intrinsic::experimental_constrained_fmul:
+ Opcode = ISD::STRICT_FMUL;
+ break;
+ case Intrinsic::experimental_constrained_fdiv:
+ Opcode = ISD::STRICT_FDIV;
+ break;
+ case Intrinsic::experimental_constrained_frem:
+ Opcode = ISD::STRICT_FREM;
+ break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
+ case Intrinsic::experimental_constrained_fptosi:
+ Opcode = ISD::STRICT_FP_TO_SINT;
+ break;
+ case Intrinsic::experimental_constrained_fptoui:
+ Opcode = ISD::STRICT_FP_TO_UINT;
+ break;
+ case Intrinsic::experimental_constrained_fptrunc:
+ Opcode = ISD::STRICT_FP_ROUND;
+ break;
+ case Intrinsic::experimental_constrained_fpext:
+ Opcode = ISD::STRICT_FP_EXTEND;
+ break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_pow:
+ Opcode = ISD::STRICT_FPOW;
+ break;
+ case Intrinsic::experimental_constrained_powi:
+ Opcode = ISD::STRICT_FPOWI;
+ break;
+ case Intrinsic::experimental_constrained_sin:
+ Opcode = ISD::STRICT_FSIN;
+ break;
+ case Intrinsic::experimental_constrained_cos:
+ Opcode = ISD::STRICT_FCOS;
+ break;
+ case Intrinsic::experimental_constrained_exp:
+ Opcode = ISD::STRICT_FEXP;
+ break;
+ case Intrinsic::experimental_constrained_exp2:
+ Opcode = ISD::STRICT_FEXP2;
+ break;
+ case Intrinsic::experimental_constrained_log:
+ Opcode = ISD::STRICT_FLOG;
+ break;
+ case Intrinsic::experimental_constrained_log10:
+ Opcode = ISD::STRICT_FLOG10;
+ break;
+ case Intrinsic::experimental_constrained_log2:
+ Opcode = ISD::STRICT_FLOG2;
+ break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain = getRoot();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ SDValue Result;
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())) });
+ else if (FPI.isUnaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)) });
+ else if (FPI.isTernaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)),
+ getValue(FPI.getArgOperand(2)) });
+ else
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)) });
+
+ if (FPI.getExceptionBehavior() !=
+ ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+ SDNodeFlags Flags;
+ Flags.setFPExcept(true);
+ Result->setFlags(Flags);
+ }
+
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ DAG.setRoot(OutChain);
+ SDValue FPResult = Result.getValue(0);
+ setValue(&FPI, FPResult);
+}
+
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MCSymbol *BeginLabel = nullptr;
+
+ if (EHPadBB) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().createTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
+
+ CLI.setChain(getRoot());
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+
+ assert((CLI.IsTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
+ HasTailCall = true;
+
+ // Since there's no actual continuation from this block, nothing can be
+ // relying on us setting vregs for them.
+ PendingExports.clear();
+ } else {
+ DAG.setRoot(Result.second);
+ }
+
+ if (EHPadBB) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ // There is a platform (e.g. wasm) that uses funclet style IR but does not
+ // actually use outlined funclets and their LSDA info style.
+ if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
+ assert(CLI.CS);
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
+ BeginLabel, EndLabel);
+ } else if (!isScopedEHPersonality(Pers)) {
+ MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
+ }
+
+ return Result;
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ const BasicBlock *EHPadBB) {
+ auto &DL = DAG.getDataLayout();
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ TargetLowering::ArgListTy Args;
+ Args.reserve(CS.arg_size());
+
+ const Value *SwiftErrorVal = nullptr;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ TargetLowering::ArgListEntry Entry;
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ Entry.setAttributes(&CS, i - CS.arg_begin());
+
+ // Use swifterror virtual register as input to the call.
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
+ SwiftErrorVal = V;
+ // We find the virtual register for the actual swifterror argument.
+ // Instead of using the Value, we use the virtual register instead.
+ Entry.Node = DAG.getRegister(
+ SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
+ }
+
+ Args.push_back(Entry);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Entry.IsSRet && isa<Instruction>(V))
+ isTailCall = false;
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ isTailCall = false;
+
+ // Disable tail calls if there is an swifterror argument. Targets have not
+ // been updated to support tail calls.
+ if (TLI.supportSwiftError() && SwiftErrorVal)
+ isTailCall = false;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setTailCall(isTailCall)
+ .setConvergent(CS.isConvergent());
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ if (Result.first.getNode()) {
+ const Instruction *Inst = CS.getInstruction();
+ Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
+ setValue(Inst, Result.first);
+ }
+
+ // The last element of CLI.InVals has the SDValue for swifterror return.
+ // Here we copy it to a virtual register and update SwiftErrorMap for
+ // book-keeping.
+ if (SwiftErrorVal && TLI.supportSwiftError()) {
+ // Get the last element of InVals.
+ SDValue Src = CLI.InVals.back();
+ Register VReg = SwiftError.getOrCreateVRegDefAt(
+ CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
+ SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
+ DAG.setRoot(CopyNode);
+ }
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ SelectionDAGBuilder &Builder) {
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+ const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
+ Ptr, MachinePointerInfo(PtrVal),
+ /* Alignment = */ 1);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
+void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
+ SDValue Value,
+ bool IsSigned) {
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ if (IsSigned)
+ Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
+ else
+ Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ setValue(&I, Value);
+}
+
+/// See if we can lower a memcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ const Value *Size = I.getArgOperand(2);
+ const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ if (CSize && CSize->getZExtValue() == 0) {
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
+ return true;
+ }
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
+ return false;
+
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ // TODO: Check alignment of src and dest ptrs.
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ }
+
+ return LVT;
+ };
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
+ }
+
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
+
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
+}
+
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+
+ unsigned DstAlign = DAG.InferPtrAlignment(Dst);
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ unsigned Align = std::min(DstAlign, SrcAlign);
+ if (Align == 0) // Alignment of one or both could not be inferred.
+ Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+
+ bool isVol = false;
+ SDLoc sdl = getCurSDLoc();
+
+ // In the mempcpy context we need to pass in a false value for isTailCall
+ // because the return pointer needs to be adjusted by the size of
+ // the copied memory.
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+ false, /*isTailCall=*/false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ assert(MC.getNode() != nullptr &&
+ "** memcpy should not be lowered as TailCall in mempcpy context **");
+ DAG.setRoot(MC);
+
+ // Check if Size needs to be truncated or extended.
+ Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
+
+ // Adjust return pointer to point just past the last dst byte.
+ SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
+ Dst, Size);
+ setValue(&I, DstPlusSize);
+ return true;
+}
+
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1), isStpcpy);
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ DAG.setRoot(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
+
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp0 = getValue(I.getArgOperand(0));
+ SDValue Tmp1 = getValue(I.getArgOperand(1));
+ EVT VT = Tmp0.getValueType();
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ return true;
+}
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ // Is this an LLVM intrinsic or a target-specific intrinsic?
+ unsigned IID = F->getIntrinsicID();
+ if (!IID)
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
+ IID = II->getIntrinsicID(F);
+
+ if (IID) {
+ visitIntrinsicCall(I, IID);
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call. Don't do the check if marked as nobuiltin for
+ // some reason or the call site requires strict floating point semantics.
+ LibFunc Func;
+ if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
+ F->hasName() && LibInfo->getLibFunc(*F, Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ break;
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
+ return;
+ break;
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ if (visitBinaryFloatCall(I, ISD::FMINNUM))
+ return;
+ break;
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ if (visitBinaryFloatCall(I, ISD::FMAXNUM))
+ return;
+ break;
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
+ return;
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
+ return;
+ break;
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
+ return;
+ break;
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
+ return;
+ break;
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
+ return;
+ break;
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
+ return;
+ break;
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
+ return;
+ break;
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ if (visitUnaryFloatCall(I, ISD::FROUND))
+ return;
+ break;
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
+ return;
+ break;
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
+ return;
+ break;
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
+ return;
+ break;
+ case LibFunc_memcmp:
+ if (visitMemCmpCall(I))
+ return;
+ break;
+ case LibFunc_mempcpy:
+ if (visitMemPCpyCall(I))
+ return;
+ break;
+ case LibFunc_memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
+ case LibFunc_strcpy:
+ if (visitStrCpyCall(I, false))
+ return;
+ break;
+ case LibFunc_stpcpy:
+ if (visitStrCpyCall(I, true))
+ return;
+ break;
+ case LibFunc_strcmp:
+ if (visitStrCmpCall(I))
+ return;
+ break;
+ case LibFunc_strlen:
+ if (visitStrLenCall(I))
+ return;
+ break;
+ case LibFunc_strnlen:
+ if (visitStrNLenCall(I))
+ return;
+ break;
+ }
+ }
+ }
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower calls with arbitrary operand bundles!");
+
+ SDValue Callee = getValue(I.getCalledValue());
+
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
+ else
+ // Check if we can potentially perform a tail call. More detailed checking
+ // is be done within LowerCallTo, after more information about the call is
+ // known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
+ }
+
+ /// Whether or not this operand accesses memory
+ bool hasMemory(const TargetLowering &TLI) const {
+ // Indirect operand accesses access memory.
+ if (isIndirect)
+ return true;
+
+ for (const auto &Code : Codes)
+ if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
+ return true;
+
+ return false;
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL) const {
+ if (!CallOperandVal) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy(DL);
+
+ llvm::Type *OpTy = CallOperandVal->getType();
+
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(DL, OpTy, true);
+ }
+};
+
+using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
+
+} // end anonymous namespace
+
+/// Make sure that the output operand \p OpInfo and its corresponding input
+/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
+/// out).
+static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &MatchingOpInfo,
+ SelectionDAG &DAG) {
+ if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
+ return;
+
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
+ MatchingOpInfo.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ MatchingOpInfo.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ // FIXME: error out in a more elegant fashion
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
+}
+
+/// Get a direct memory input to behave well as an indirect operand.
+/// This may introduce stores, hence the need for a \p Chain.
+/// \return The (possibly updated) chain.
+static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
+ SDISelAsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we don't have an indirect input, put it in the constpool if we can,
+ // otherwise spill it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(
+ cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+ return Chain;
+ }
+
+ // Otherwise, create a stack slot and emit a store to it before the asm.
+ Type *Ty = OpVal->getType();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align = DL.getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
+ Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(MF, SSFI),
+ TLI.getMemValueType(DL, Ty));
+ OpInfo.CallOperand = StackSlot;
+
+ return Chain;
+}
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand
+/// RefOpInfo describes the matching operand if any, the operand otherwise
+static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
+ LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // No work to do for memory operations.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ return;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ unsigned AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return;
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
+
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is an FP operand in an integer register (or visa versa), or more
+ // generally if the operand value disagrees with the register class we plan
+ // to stick it in, fix the operand type.
+ //
+ // If this is an input value, the bitcast to the new type is done now.
+ // Bitcast for output value is done at the end of visitInlineAsm().
+ if ((OpInfo.Type == InlineAsm::isOutput ||
+ OpInfo.Type == InlineAsm::isInput) &&
+ !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types). Note: output bitcast is done at the end of
+ // visitInlineAsm().
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ // Exclude indirect inputs while they are unsupported because the code
+ // to perform the load is missing and thus OpInfo.CallOperand still
+ // refers to the input address rather than the pointed-to value.
+ if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ // If the operand is an FP value and we want it in integer registers,
+ // use the corresponding integer type. This turns an f64 value into
+ // i64, which can be passed with two i32 values on a 32-bit machine.
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ if (OpInfo.Type == InlineAsm::isInput)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = VT;
+ }
+ }
+ }
+
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return;
+
+ EVT ValueVT = OpInfo.ConstraintVT;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+
+ // If this associated to a specific register, initialize iterator to correct
+ // place. If virtual, make sure we have enough registers
+
+ // Initialize iterator if necessary
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Do not check for single registers.
+ if (AssignedReg) {
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "AssignedReg should be member of RC");
+ }
+
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
+ Regs.push_back(R);
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+}
+
+static unsigned
+findMatchingInlineAsmOperand(unsigned OperandNo,
+ const std::vector<SDValue> &AsmNodeOperands) {
+ // Scan until we find the definition we already emitted of this operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+ }
+ return CurOp;
+}
+
+namespace {
+
+class ExtraFlags {
+ unsigned Flags = 0;
+
+public:
+ explicit ExtraFlags(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ if (IA->hasSideEffects())
+ Flags |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ Flags |= InlineAsm::Extra_IsAlignStack;
+ if (CS.isConvergent())
+ Flags |= InlineAsm::Extra_IsConvergent;
+ Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+ }
+
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an Other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for Other constraints as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ Flags |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ Flags |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ unsigned get() const { return Flags; }
+};
+
+} // end anonymous namespace
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
+
+ // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
+ // AsmDialect, MayLoad, MayStore).
+ bool HasSideEffect = IA->hasSideEffects();
+ ExtraFlags ExtraInfo(CS);
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ if (OpInfo.Type == InlineAsm::isInput ||
+ (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+
+ // Process the call argument. BasicBlocks are labels, currently appearing
+ // only in asm's.
+ const Instruction *I = CS.getInstruction();
+ if (isa<CallBrInst>(I) &&
+ (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
+ cast<CallBrInst>(I)->getNumIndirectDests())) {
+ const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
+ OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
+ } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpInfo.ConstraintVT =
+ OpInfo
+ .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
+ .getSimpleVT();
+ } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = TLI.getSimpleValueType(
+ DAG.getDataLayout(), STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT =
+ TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ }
+ ++ResNo;
+ } else {
+ OpInfo.ConstraintVT = MVT::Other;
+ }
+
+ if (!HasSideEffect)
+ HasSideEffect = OpInfo.hasMemory(TLI);
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ // FIXME: Could we compute this on OpInfo rather than T?
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(T, SDValue());
+
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(
+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
+ "integer constant expression");
+
+ ExtraInfo.update(T);
+ }
+
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+
+ bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+ if (IsCallBr) {
+ // If this is a callbr we need to flush pending exports since inlineasm_br
+ // is a terminator. We need to do this before nodes are glued to
+ // the inlineasm_br node.
+ Chain = getControlRoot();
+ }
+
+ // Second pass over the constraints: compute which constraint option to use.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ patchMatchingInput(OpInfo, Input, DAG);
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value.
+ Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = nullptr;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
+ IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Third pass: Loop over operands to prepare DAG-level operands.. As part of
+ // this, assign virtual and physical registers for inputs and otput.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ // Assign Registers.
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
+ OpInfo.isIndirect)) {
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
+ !OpInfo.isIndirect) ||
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(
+ CS, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
+ }
+ break;
+
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) {
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
+ AsmNodeOperands);
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ emitInlineAsmError(CS, "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ return;
+ }
+
+ MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
+ SmallVector<unsigned, 4> Regs;
+
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+ MachineRegisterInfo &RegInfo =
+ DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+ } else {
+ emitInlineAsmError(CS, "inline asm error: This value type register "
+ "class is not natively supported!");
+ return;
+ }
+
+ RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
+
+ SDLoc dl = getCurSDLoc();
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ CS.getInstruction());
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(), dl,
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(CS, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
+ "Memory operands expect pointer values");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
+ "Unknown constraint type!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ emitInlineAsmError(
+ CS, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ SDLoc dl = getCurSDLoc();
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
+ Chain, &Flag, CS.getInstruction());
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ dl, DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber:
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, getCurSDLoc(), DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
+ Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ Flag = Chain.getValue(1);
+
+ // Do additional work to generate outputs.
+
+ SmallVector<EVT, 1> ResultVTs;
+ SmallVector<SDValue, 1> ResultValues;
+ SmallVector<SDValue, 8> OutChains;
+
+ llvm::Type *CSResultType = CS.getType();
+ ArrayRef<Type *> ResultTypes;
+ if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+ ResultTypes = StructResult->elements();
+ else if (!CSResultType->isVoidTy())
+ ResultTypes = makeArrayRef(CSResultType);
+
+ auto CurResultType = ResultTypes.begin();
+ auto handleRegAssign = [&](SDValue V) {
+ assert(CurResultType != ResultTypes.end() && "Unexpected value");
+ assert((*CurResultType)->isSized() && "Unexpected unsized type");
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
+ ++CurResultType;
+ // If the type of the inline asm call site return value is different but has
+ // same size as the type of the asm output bitcast it. One example of this
+ // is for vectors with different width / number of elements. This can
+ // happen for register classes that can contain multiple different value
+ // types. The preg or vreg allocated may not have the same VT as was
+ // expected.
+ //
+ // This can also happen for a return value that disagrees with the register
+ // class it is put in, eg. a double in a general-purpose register on a
+ // 32-bit machine.
+ if (ResultVT != V.getValueType() &&
+ ResultVT.getSizeInBits() == V.getValueSizeInBits())
+ V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
+ else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
+ V.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result
+ // may have a wider width than the expected result. Extract the
+ // relevant portion.
+ V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
+ }
+ assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
+ ResultVTs.push_back(ResultVT);
+ ResultValues.push_back(V);
+ };
+
+ // Deal with output operands.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.Type == InlineAsm::isOutput) {
+ SDValue Val;
+ // Skip trivial output operands.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ continue;
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass:
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+ break;
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ break;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Unknown:
+ assert(false && "Unexpected unknown constraint");
+ }
+
+ // Indirect output manifest as stores. Record output chains.
+ if (OpInfo.isIndirect) {
+ const Value *Ptr = OpInfo.CallOperandVal;
+ assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
+ SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
+ MachinePointerInfo(Ptr));
+ OutChains.push_back(Store);
+ } else {
+ // generate CopyFromRegs to associated registers.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (Val.getOpcode() == ISD::MERGE_VALUES) {
+ for (const SDValue &V : Val->op_values())
+ handleRegAssign(V);
+ } else
+ handleRegAssign(Val);
+ }
+ }
+ }
+
+ // Set results.
+ if (!ResultValues.empty()) {
+ assert(CurResultType == ResultTypes.end() &&
+ "Mismatch in number of ResultTypes");
+ assert(ResultValues.size() == ResultTypes.size() &&
+ "Mismatch in number of output operands in asm result");
+
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(CS.getInstruction(), V);
+ }
+
+ // Collect store chains.
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
+
+ // Only Update Root if inline assembly has a memory effect.
+ if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr)
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
+ const Twine &Message) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), Message);
+
+ // Make sure we leave the DAG in a valid state
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+
+ if (ValueVTs.empty())
+ return;
+
+ SmallVector<SDValue, 1> Ops;
+ for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
+
+ setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDValue V = DAG.getVAArg(
+ TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
+ getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
+ DL.getABITypeAlignment(I.getType()));
+ DAG.setRoot(V.getValue(1));
+
+ if (I.getType()->isPointerTy())
+ V = DAG.getPtrExtOrTrunc(
+ V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
+ setValue(&I, V);
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
+ const Instruction &I,
+ SDValue Op) {
+ const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
+ if (!Range)
+ return Op;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Range);
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
+ return Op;
+
+ APInt Lo = CR.getUnsignedMin();
+ if (!Lo.isMinValue())
+ return Op;
+
+ APInt Hi = CR.getUnsignedMax();
+ unsigned Bits = std::max(Hi.getActiveBits(),
+ static_cast<unsigned>(IntegerType::MIN_INT_BITS));
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+
+ SDLoc SL = getCurSDLoc();
+
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+ DAG.getValueType(SmallVT));
+ unsigned NumVals = Op.getNode()->getNumValues();
+ if (NumVals == 1)
+ return ZExt;
+
+ SmallVector<SDValue, 4> Ops;
+
+ Ops.push_back(ZExt);
+ for (unsigned I = 1; I != NumVals; ++I)
+ Ops.push_back(Op.getValue(I));
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
+/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// the call being lowered.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+void SelectionDAGBuilder::populateCallLoweringInfo(
+ TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
+ unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
+ bool IsPatchPoint) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = Call->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(Call, ArgI);
+ Args.push_back(Entry);
+ }
+
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(Call->use_empty())
+ .setIsPatchPoint(IsPatchPoint);
+}
+
+/// Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so FinalizeISel can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
+ const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CS.getArgument(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
+ } else
+ Ops.push_back(OpVal);
+ }
+}
+
+/// Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Chain, InFlag, Callee, NullPtr;
+ SmallVector<SDValue, 32> Ops;
+
+ SDLoc DL = getCurSDLoc();
+ Callee = getValue(CI.getCalledValue());
+ NullPtr = DAG.getIntPtrConstant(0, DL, true);
+
+ // The stackmap intrinsic only records the live variables (the arguemnts
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // chain, flag = CALLSEQ_START(chain, 0, 0)
+ // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+ // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+ //
+ Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
+ InFlag = Chain.getValue(1);
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
+ MVT::i32));
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(&CI, 2, DL, Ops, *this);
+
+ // We are not pushing any register mask info here on the operands list,
+ // because the stackmap doesn't clobber anything.
+
+ // Push the chain and the glue flag.
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+
+ // Create the STACKMAP node.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
+ Chain = SDValue(SM, 0);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+
+ // Stackmaps don't generate values, so nothing goes into the NodeMap.
+
+ // Set the root to the target-lowered call chain.
+ DAG.setRoot(Chain);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
+}
+
+/// Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+ const BasicBlock *EHPadBB) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CS.getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !CS->getType()->isVoidTy();
+ SDLoc dl = getCurSDLoc();
+ SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+
+ // Handle immediate and symbolic callees.
+ if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
+ Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
+ /*isTarget=*/true);
+ else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
+ SDLoc(SymbolicCallee),
+ SymbolicCallee->getValueType(0));
+
+ // Get the real number of arguments participating in the call <numArgs>
+ SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
+ unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // Intrinsics include all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ Type *ReturnTy =
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
+ NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ SDNode *CallEnd = Result.second.getNode();
+ if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool HasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
+ SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
+ MVT::i32));
+
+ // Add the callee.
+ Ops.push_back(Callee);
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
+ NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC)
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
+ Ops.push_back(getValue(CS.getArgument(i)));
+
+ // Push the arguments from the call instruction up to the register mask.
+ SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ Ops.append(Call->op_begin() + 2, e);
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
+
+ // Push the register mask info.
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end()-2));
+ else
+ Ops.push_back(*(Call->op_end()-1));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys;
+ if (IsAnyRegCC && HasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs);
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+ dl, NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (HasDef) {
+ if (IsAnyRegCC)
+ setValue(CS.getInstruction(), SDValue(MN, 0));
+ else
+ setValue(CS.getInstruction(), Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (IsAnyRegCC && HasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.DeleteNode(Call);
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
+}
+
+void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2;
+ if (I.getNumArgOperands() > 1)
+ Op2 = getValue(I.getArgOperand(1));
+ SDLoc dl = getCurSDLoc();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Res;
+ FastMathFlags FMF;
+ if (isa<FPMathOperator>(I))
+ FMF = I.getFastMathFlags();
+
+ switch (Intrinsic) {
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_mul:
+ Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_and:
+ Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_or:
+ Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_xor:
+ Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smax:
+ Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smin:
+ Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umax:
+ Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umin:
+ Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmin:
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
+ break;
+ default:
+ llvm_unreachable("Unhandled vector reduce intrinsic");
+ }
+ setValue(&I, Res);
+}
+
+/// Returns an AttributeList representing the attributes applied to the return
+/// value of the given call.
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ Type *OrigRetTy = CLI.RetTy;
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ auto &DL = CLI.DAG.getDataLayout();
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+
+ if (CLI.IsPostTypeLegalization) {
+ // If we are lowering a libcall after legalization, split the return type.
+ SmallVector<EVT, 4> OldRetTys;
+ SmallVector<uint64_t, 4> OldOffsets;
+ RetTys.swap(OldRetTys);
+ Offsets.swap(OldOffsets);
+
+ for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
+ EVT RetVT = OldRetTys[i];
+ uint64_t Offset = OldOffsets[i];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
+ unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
+ RetTys.append(NumRegs, RegisterVT);
+ for (unsigned j = 0; j != NumRegs; ++j)
+ Offsets.push_back(Offset + j * RegisterVTByteSZ);
+ }
+ }
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
+
+ bool CanLowerReturn =
+ this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+ CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+ if (!CanLowerReturn) {
+ // FIXME: equivalent assert?
+ // assert(!CS.hasInAllocaArgument() &&
+ // "sret demotion is incompatible with inalloca");
+ uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
+ unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
+ DL.getAllocaAddrSpace());
+
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
+ ArgListEntry Entry;
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftError = false;
+ Entry.Alignment = Align;
+ CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.NumFixedArgs += 1;
+ CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+ // sret demotion isn't compatible with tail-calls, since the sret argument
+ // points into the callers stack frame.
+ CLI.IsTailCall = false;
+ } else {
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ ISD::ArgFlagsTy Flags;
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (I == RetTys.size() - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.Flags = Flags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetTy->isPointerTy()) {
+ MyFlags.Flags.setPointer();
+ MyFlags.Flags.setPointerAddrSpace(
+ cast<PointerType>(CLI.RetTy)->getAddressSpace());
+ }
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // We push in swifterror return as the last element of CLI.Ins.
+ ArgListTy &Args = CLI.getArgs();
+ if (supportSwiftError()) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ if (Args[i].IsSwiftError) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = getPointerTy(DL);
+ MyFlags.ArgVT = EVT(getPointerTy(DL));
+ MyFlags.Flags.setSwiftError();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ // FIXME: Split arguments if CLI.IsPostTypeLegalization
+ Type *FinalType = Args[i].Ty;
+ if (Args[i].IsByVal)
+ FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
+
+ if (Args[i].Ty->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Args[i].Ty)->getAddressSpace());
+ }
+ if (Args[i].IsZExt)
+ Flags.setZExt();
+ if (Args[i].IsSExt)
+ Flags.setSExt();
+ if (Args[i].IsInReg) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (CLI.CallConv == CallingConv::X86_VectorCall &&
+ isa<StructType>(FinalType)) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
+ Flags.setInReg();
+ }
+ if (Args[i].IsSRet)
+ Flags.setSRet();
+ if (Args[i].IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Args[i].IsSwiftError)
+ Flags.setSwiftError();
+ if (Args[i].IsByVal)
+ Flags.setByVal();
+ if (Args[i].IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].IsByVal || Args[i].IsInAlloca) {
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
+
+ unsigned FrameSize = DL.getTypeAllocSize(
+ Args[i].ByValType ? Args[i].ByValType : ElementTy);
+ Flags.setByValSize(FrameSize);
+
+ // info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValAlign(Align(FrameAlign));
+ }
+ if (Args[i].IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].IsSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].IsZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // Conservatively only handle 'returned' on non-vectors that can be lowered,
+ // for now.
+ if (Args[i].IsReturned && !Op.getValueType().isVector() &&
+ CanLowerReturn) {
+ assert((CLI.RetTy == Args[i].Ty ||
+ (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+ CLI.RetTy->getPointerAddressSpace() ==
+ Args[i].Ty->getPointerAddressSpace())) &&
+ RetTys.size() == NumValues && "unexpected use of 'returned'");
+ // Before passing 'returned' to the target lowering code, ensure that
+ // either the register MVT and the actual EVT are the same size or that
+ // the return value and argument are extended in the same way; in these
+ // cases it's safe to pass the argument register value unchanged as the
+ // return register value (although it's at the target's option whether
+ // to do so)
+ // TODO: allow code generation to take advantage of partially preserved
+ // registers rather than clobbering the entire register when the
+ // parameter extension method is not compatible with the return
+ // extension method
+ if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
+ }
+
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
+ CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
+ i < CLI.NumFixedArgs,
+ i, j*Parts[j].getValueType().getStoreSize());
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0) {
+ MyFlags.Flags.setOrigAlign(Align::None());
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
+
+ if (NeedsRegBlock && Value == NumValues - 1)
+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ CLI.Chain = LowerCall(CLI, InVals);
+
+ // Update CLI.InVals to use outside of this function.
+ CLI.InVals = InVals;
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!CLI.IsTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() && "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ }
+#endif
+
+ SmallVector<SDValue, 4> ReturnValues;
+ if (!CanLowerReturn) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
+
+ ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ unsigned NumValues = RetTys.size();
+ ReturnValues.resize(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+ CLI.DAG.getConstant(Offsets[i], CLI.DL,
+ PtrVT), Flags);
+ SDValue L = CLI.DAG.getLoad(
+ RetTys[i], CLI.DL, CLI.Chain, Add,
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ /* Alignment = */ 1);
+ ReturnValues[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+ } else {
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ Optional<ISD::NodeType> AssertOp;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, nullptr,
+ CLI.CallConv, AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+ }
+
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(RetTys), ReturnValues);
+ return std::make_pair(Res, CLI.Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // If this is an InlineAsm we have to match the registers required, not the
+ // notional registers required by the type.
+
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
+ None); // This is not an ABI copy.
+ SDValue Chain = DAG.getEntryNode();
+
+ ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
+ FuncInfo.PreferredExtendType.end())
+ ? ISD::ANY_EXTEND
+ : FuncInfo.PreferredExtendType[V];
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (FastISel)
+ return A->use_empty();
+
+ const BasicBlock &Entry = A->getParent()->front();
+ for (const User *U : A->users())
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+
+ return true;
+}
+
+using ArgCopyElisionMapTy =
+ DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info intrinsics, they don't escape or store to allocas.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca. Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
+ << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ SDValue ArgVal, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ LLVM_DEBUG(
+ dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ unsigned RequiredAlignment = AI->getAlignment();
+ if (!RequiredAlignment) {
+ RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ AI->getAllocatedType());
+ }
+ if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ LLVM_DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
+void SelectionDAGISel::LowerArguments(const Function &F) {
+ SelectionDAG &DAG = SDB->DAG;
+ SDLoc dl = SDB->getCurSDLoc();
+ const DataLayout &DL = DAG.getDataLayout();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
+ Ins.push_back(RetArg);
+ }
+
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
+ // Set up the incoming argument description vector.
+ for (const Argument &Arg : F.args()) {
+ unsigned ArgNo = Arg.getArgNo();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
+ unsigned PartBase = 0;
+ Type *FinalType = Arg.getType();
+ if (Arg.hasAttribute(Attribute::ByVal))
+ FinalType = Arg.getParamByValType();
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ FinalType, F.getCallingConv(), F.isVarArg());
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
+
+ if (Arg.getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Arg.getType())->getAddressSpace());
+ }
+ if (Arg.hasAttribute(Attribute::ZExt))
+ Flags.setZExt();
+ if (Arg.hasAttribute(Attribute::SExt))
+ Flags.setSExt();
+ if (Arg.hasAttribute(Attribute::InReg)) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (F.getCallingConv() == CallingConv::X86_VectorCall &&
+ isa<StructType>(Arg.getType())) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
+ Flags.setInReg();
+ }
+ if (Arg.hasAttribute(Attribute::StructRet))
+ Flags.setSRet();
+ if (Arg.hasAttribute(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (Arg.hasAttribute(Attribute::SwiftError))
+ Flags.setSwiftError();
+ if (Arg.hasAttribute(Attribute::ByVal))
+ Flags.setByVal();
+ if (Arg.hasAttribute(Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (F.getCallingConv() == CallingConv::X86_INTR) {
+ // IA Interrupt passes frame (1st parameter) by value in the stack.
+ if (ArgNo == 0)
+ Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
+ Type *ElementTy = Arg.getParamByValType();
+
+ // For ByVal, size and alignment should be passed from FE. BE will
+ // guess if this info is not there but there are cases it cannot get
+ // right.
+ unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
+ Flags.setByValSize(FrameSize);
+
+ unsigned FrameAlign;
+ if (Arg.getParamAlignment())
+ FrameAlign = Arg.getParamAlignment();
+ else
+ FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValAlign(Align(FrameAlign));
+ }
+ if (Arg.hasAttribute(Attribute::Nest))
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(OriginalAlignment);
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
+ if (Arg.hasAttribute(Attribute::Returned))
+ Flags.setReturned();
+
+ MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ unsigned NumRegs = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+ ArgNo, PartBase+i*RegisterVT.getStoreSize());
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0) {
+ MyFlags.Flags.setOrigAlign(Align::None());
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+ Ins.push_back(MyFlags);
+ }
+ if (NeedsRegBlock && Value == NumValues - 1)
+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
+ PartBase += VT.getStoreSize();
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI->LowerFormalArguments(
+ DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ LLVM_DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
+ MVT VT = ValueVTs[0].getSimpleVT();
+ MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+ Optional<ISD::NodeType> AssertOp = None;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
+ nullptr, F.getCallingConv(), AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ Register SRetReg =
+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot =
+ SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ ++i;
+ }
+
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ InVals[i], ArgHasUses);
+ }
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ bool isSwiftErrorArg =
+ TLI->supportSwiftError() &&
+ Arg.hasAttribute(Attribute::SwiftError);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
+
+ // Also remember any frame index for use in FastISel.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ }
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+
+ // Even an apparant 'unused' swifterror argument needs to be returned. So
+ // we do generate a copy for it that can be used on return from the
+ // function.
+ if (ArgHasUses || isSwiftErrorArg) {
+ Optional<ISD::NodeType> AssertOp;
+ if (Arg.hasAttribute(Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (Arg.hasAttribute(Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr,
+ F.getCallingConv(), AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
+ SDB->getCurSDLoc());
+
+ SDB->setValue(&Arg, Res);
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ // We want to associate the argument with the frame index, among
+ // involved operands, that correspond to the lowest address. The
+ // getCopyFromParts function, called earlier, is swapping the order of
+ // the operands to BUILD_PAIR depending on endianness. The result of
+ // that swapping is that the least significant bits of the argument will
+ // be in the first operand of the BUILD_PAIR node, and the most
+ // significant bits will be in the second operand.
+ unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ }
+
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
+ // Update the SwiftErrorVRegDefMap.
+ if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg))
+ SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+ Reg);
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Register::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[&Arg] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
+ }
+ }
+
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+ if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.Slot = I->second;
+ }
+ }
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const Instruction *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check PHI nodes in successors that expect a value to be available from this
+ // block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (const PHINode &PN : SuccBB->phis()) {
+ // Ignore dead phi's.
+ if (PN.use_empty())
+ continue;
+
+ // Skip empty types
+ if (PN.getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
+
+ if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C);
+ CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp);
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(
+ std::make_pair(&*MBBI++, Reg + i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+
+ ConstantsOut.clear();
+}
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+SelectionDAGBuilder::StackProtectorDescriptor::
+AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator I(MBB);
+ if (++I == FuncInfo.MF->end())
+ return nullptr;
+ return &*I;
+}
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+ // If the node is null, we do have a tail call.
+ if (MaybeTC.getNode() != nullptr)
+ DAG.setRoot(MaybeTC);
+ else
+ HasTailCall = true;
+}
+
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB) {
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ unsigned Size = W.LastCluster - W.FirstCluster + 1;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+
+ if (Size == 2 && W.MBB == SwitchMBB) {
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3
+ // cases.
+ // TODO: Handle cases where W.CaseBB != SwitchBB.
+ CaseCluster &Small = *W.FirstCluster;
+ CaseCluster &Big = *W.LastCluster;
+
+ if (Small.Low == Small.High && Big.Low == Big.High &&
+ Small.MBB == Big.MBB) {
+ const APInt &SmallValue = Small.Low->getValue();
+ const APInt &BigValue = Big.Low->getValue();
+
+ // Check that there is only one bit different.
+ APInt CommonBit = BigValue ^ SmallValue;
+ if (CommonBit.isPowerOf2()) {
+ SDValue CondLHS = getValue(Cond);
+ EVT VT = CondLHS.getValueType();
+ SDLoc DL = getCurSDLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, DL, VT));
+ SDValue Cond = DAG.getSetCC(
+ DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
+
+ // Insert the true branch.
+ SDValue BrCond =
+ DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.MBB));
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(DefaultMBB));
+
+ DAG.setRoot(BrCond);
+ return;
+ }
+ }
+ }
+
+ if (TM.getOptLevel() != CodeGenOpt::None) {
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob != b.Prob ?
+ a.Prob > b.Prob :
+ a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_JumpTable: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ JTH->OmitRangeCheck = true;
+ }
+
+ if (!JTH->OmitRangeCheck)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ visitJumpTableHeader(*JT, *JTH, SwitchMBB);
+ JTH->Emitted = true;
+ }
+ break;
+ }
+ case CC_BitTests: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ visitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ break;
+ }
+ case CC_Range: {
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ CC = ISD::SETEQ;
+ LHS = Cond;
+ RHS=I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ CC = ISD::SETLE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // If Fallthrough is unreachable, fold away the comparison.
+ if (FallthroughUnreachable)
+ CC = ISD::SETTRUE;
+
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
+ getCurSDLoc(), I->Prob, UnhandledProbs);
+
+ if (CurMBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SL->SwitchCases.push_back(CB);
+
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
+}
+
+unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
+ CaseClusterIt First,
+ CaseClusterIt Last) {
+ return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
+
+ // Ties are broken by comparing the case value.
+ return X.Low->getValue().slt(CC.Low->getValue());
+ });
+}
+
+void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
+ const SwitchWorkListItem &W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB) {
+ assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+ "Clusters not sorted?");
+
+ assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
+ // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+ CaseClusterIt LastLeft = W.FirstCluster;
+ CaseClusterIt FirstRight = W.LastCluster;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
+
+ // Move LastLeft and FirstRight towards each other from opposite directions to
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
+ unsigned I = 0;
+ while (LastLeft + 1 < FirstRight) {
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
+ else
+ RightProb += (--FirstRight)->Prob;
+ I++;
+ }
+
+ while (true) {
+ // Our binary search tree differs from a typical BST in that ours can have up
+ // to three values in each leaf. The pivot selection above doesn't take that
+ // into account, which means the tree might require more nodes and be less
+ // efficient. We compensate for this here.
+
+ unsigned NumLeft = LastLeft - W.FirstCluster + 1;
+ unsigned NumRight = W.LastCluster - FirstRight + 1;
+
+ if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
+ // If one side has less than 3 clusters, and the other has more than 3,
+ // consider taking a cluster from the other side.
+
+ if (NumLeft < NumRight) {
+ // Consider moving the first cluster on the right to the left side.
+ CaseCluster &CC = *FirstRight;
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ if (LeftSideRank <= RightSideRank) {
+ // Moving the cluster to the left does not demote it.
+ ++LastLeft;
+ ++FirstRight;
+ continue;
+ }
+ } else {
+ assert(NumRight < NumLeft);
+ // Consider moving the last element on the left to the right side.
+ CaseCluster &CC = *LastLeft;
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ if (RightSideRank <= LeftSideRank) {
+ // Moving the cluster to the right does not demot it.
+ --LastLeft;
+ --FirstRight;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+
+ assert(LastLeft + 1 == FirstRight);
+ assert(LastLeft >= W.FirstCluster);
+ assert(FirstRight <= W.LastCluster);
+
+ // Use the first element on the right as pivot since we will make less-than
+ // comparisons against it.
+ CaseClusterIt PivotCluster = FirstRight;
+ assert(PivotCluster > W.FirstCluster);
+ assert(PivotCluster <= W.LastCluster);
+
+ CaseClusterIt FirstLeft = W.FirstCluster;
+ CaseClusterIt LastRight = W.LastCluster;
+
+ const ConstantInt *Pivot = PivotCluster->Low;
+
+ // New blocks will be inserted immediately after the current one.
+ MachineFunction::iterator BBI(W.MBB);
+ ++BBI;
+
+ // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+ // we can branch to its destination directly if it's squeezed exactly in
+ // between the known lower bound and Pivot - 1.
+ MachineBasicBlock *LeftMBB;
+ if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+ FirstLeft->Low == W.GE &&
+ (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+ LeftMBB = FirstLeft->MBB;
+ } else {
+ LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, LeftMBB);
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+ // single cluster, RHS.Low == Pivot, and we can branch to its destination
+ // directly if RHS.High equals the current upper bound.
+ MachineBasicBlock *RightMBB;
+ if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
+ W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+ RightMBB = FirstRight->MBB;
+ } else {
+ RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, RightMBB);
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Create the CaseBlock record that will be used to lower the branch.
+ CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+ getCurSDLoc(), LeftProb, RightProb);
+
+ if (W.MBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SL->SwitchCases.push_back(CB);
+}
+
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+ BranchProbability PeeledCaseProb) {
+ if (PeeledCaseProb == BranchProbability::getOne())
+ return BranchProbability::getZero();
+ BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+
+ uint32_t Numerator = CaseProb.getNumerator();
+ uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
+ return BranchProbability(Numerator, std::max(Numerator, Denominator));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+ const SwitchInst &SI, CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ // Don't perform if there is only one cluster or optimizing for size.
+ if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+ TM.getOptLevel() == CodeGenOpt::None ||
+ SwitchMBB->getParent()->getFunction().hasMinSize())
+ return SwitchMBB;
+
+ BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+ unsigned PeeledCaseIndex = 0;
+ bool SwitchPeeled = false;
+ for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+ CaseCluster &CC = Clusters[Index];
+ if (CC.Prob < TopCaseProb)
+ continue;
+ TopCaseProb = CC.Prob;
+ PeeledCaseIndex = Index;
+ SwitchPeeled = true;
+ }
+ if (!SwitchPeeled)
+ return SwitchMBB;
+
+ LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
+ << TopCaseProb << "\n");
+
+ // Record the MBB for the peeled switch statement.
+ MachineFunction::iterator BBI(SwitchMBB);
+ ++BBI;
+ MachineBasicBlock *PeeledSwitchMBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+ ExportFromCurrentBlock(SI.getCondition());
+ auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+ SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+ nullptr, nullptr, TopCaseProb.getCompl()};
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+ Clusters.erase(PeeledCaseIt);
+ for (CaseCluster &CC : Clusters) {
+ LLVM_DEBUG(
+ dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
+ CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+ LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ }
+ PeeledCaseProb = TopCaseProb;
+ return PeeledSwitchMBB;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ // Extract cases from the switch.
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (auto I : SI.cases()) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ // The branch probablity of the peeled case.
+ BranchProbability PeeledCaseProb = BranchProbability::getZero();
+ MachineBasicBlock *PeeledSwitchMBB =
+ peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
+ // If there is only the default destination, jump there directly.
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ if (Clusters.empty()) {
+ assert(PeeledSwitchMBB == SwitchMBB);
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != NextBlock(SwitchMBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
+ }
+ return;
+ }
+
+ SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findBitTestClusters(Clusters, &SI);
+
+ LLVM_DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+ // Scale the branchprobability for DefaultMBB if the peel occurs and
+ // DefaultMBB is not replaced.
+ if (PeeledCaseProb != BranchProbability::getZero() &&
+ DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+ DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+ WorkList.push_back(
+ {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.back();
+ WorkList.pop_back();
+ unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+ !DefaultMBB->getParent()->getFunction().hasMinSize()) {
+ // For optimized builds, lower large range as a balanced binary tree.
+ splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+ continue;
+ }
+
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+ }
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 0000000000000..bfcf30b430b6d
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,892 @@
+//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+
+#include "StatepointLowering.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class AtomicCmpXchgInst;
+class AtomicRMWInst;
+class BasicBlock;
+class BranchInst;
+class CallInst;
+class CallBrInst;
+class CatchPadInst;
+class CatchReturnInst;
+class CatchSwitchInst;
+class CleanupPadInst;
+class CleanupReturnInst;
+class Constant;
+class ConstantInt;
+class ConstrainedFPIntrinsic;
+class DbgValueInst;
+class DataLayout;
+class DIExpression;
+class DILocalVariable;
+class DILocation;
+class FenceInst;
+class FunctionLoweringInfo;
+class GCFunctionInfo;
+class GCRelocateInst;
+class GCResultInst;
+class IndirectBrInst;
+class InvokeInst;
+class LandingPadInst;
+class LLVMContext;
+class LoadInst;
+class MachineBasicBlock;
+class PHINode;
+class ResumeInst;
+class ReturnInst;
+class SDDbgValue;
+class StoreInst;
+class SwiftErrorValueTracking;
+class SwitchInst;
+class TargetLibraryInfo;
+class TargetMachine;
+class Type;
+class VAArgInst;
+class UnreachableInst;
+class Use;
+class User;
+class Value;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// The current instruction being visited.
+ const Instruction *CurInst = nullptr;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI = nullptr;
+ DebugLoc dl;
+ unsigned SDNodeOrder = 0;
+
+ public:
+ DanglingDebugInfo() = default;
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
+ : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
+
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// Helper type for DanglingDebugInfoMap.
+ typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
+
+ /// Keeps track of dbg_values for which we have not yet seen the referent.
+ /// We defer handling these until we do see it.
+ MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+
+public:
+ /// Loads are not emitted to the program immediately. We bunch them up and
+ /// then emit token factor nodes when possible. This allows us to get simple
+ /// disambiguation between loads without worrying about alias analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+
+ /// State used while lowering a statepoint sequence (gc_statepoint,
+ /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
+ StatepointLoweringState StatepointLowering;
+
+private:
+ /// CopyToReg nodes that copy values to virtual registers for export to other
+ /// blocks need to be emitted before any terminator instruction, but they have
+ /// no other ordering requirements. We bunch them up and the emit a single
+ /// tokenfactor for them just before terminator instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// A unique monotonically increasing number used to order the SDNodes we
+ /// create.
+ unsigned SDNodeOrder;
+
+ /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
+ /// than each cluster in the range, its rank is 0.
+ unsigned caseClusterRank(const SwitchCG::CaseCluster &CC,
+ SwitchCG::CaseClusterIt First,
+ SwitchCG::CaseClusterIt Last);
+
+ /// Emit comparison and split W into two subtrees.
+ void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+ MachineBasicBlock *SwitchMBB);
+
+ /// Lower W.
+ void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB);
+
+ /// Peel the top probability case if it exceeds the threshold
+ MachineBasicBlock *
+ peelDominantCaseCluster(const SwitchInst &SI,
+ SwitchCG::CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
+
+ /// A class which encapsulates all of the information needed to generate a
+ /// stack protector check and signals to isel via its state being initialized
+ /// that a stack protector needs to be generated.
+ ///
+ /// *NOTE* The following is a high level documentation of SelectionDAG Stack
+ /// Protector Generation. The reason that it is placed here is for a lack of
+ /// other good places to stick it.
+ ///
+ /// High Level Overview of SelectionDAG Stack Protector Generation:
+ ///
+ /// Previously, generation of stack protectors was done exclusively in the
+ /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
+ /// splitting basic blocks at the IR level to create the success/failure basic
+ /// blocks in the tail of the basic block in question. As a result of this,
+ /// calls that would have qualified for the sibling call optimization were no
+ /// longer eligible for optimization since said calls were no longer right in
+ /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
+ /// instruction).
+ ///
+ /// Then it was noticed that since the sibling call optimization causes the
+ /// callee to reuse the caller's stack, if we could delay the generation of
+ /// the stack protector check until later in CodeGen after the sibling call
+ /// decision was made, we get both the tail call optimization and the stack
+ /// protector check!
+ ///
+ /// A few goals in solving this problem were:
+ ///
+ /// 1. Preserve the architecture independence of stack protector generation.
+ ///
+ /// 2. Preserve the normal IR level stack protector check for platforms like
+ /// OpenBSD for which we support platform-specific stack protector
+ /// generation.
+ ///
+ /// The main problem that guided the present solution is that one can not
+ /// solve this problem in an architecture independent manner at the IR level
+ /// only. This is because:
+ ///
+ /// 1. The decision on whether or not to perform a sibling call on certain
+ /// platforms (for instance i386) requires lower level information
+ /// related to available registers that can not be known at the IR level.
+ ///
+ /// 2. Even if the previous point were not true, the decision on whether to
+ /// perform a tail call is done in LowerCallTo in SelectionDAG which
+ /// occurs after the Stack Protector Pass. As a result, one would need to
+ /// put the relevant callinst into the stack protector check success
+ /// basic block (where the return inst is placed) and then move it back
+ /// later at SelectionDAG/MI time before the stack protector check if the
+ /// tail call optimization failed. The MI level option was nixed
+ /// immediately since it would require platform-specific pattern
+ /// matching. The SelectionDAG level option was nixed because
+ /// SelectionDAG only processes one IR level basic block at a time
+ /// implying one could not create a DAG Combine to move the callinst.
+ ///
+ /// To get around this problem a few things were realized:
+ ///
+ /// 1. While one can not handle multiple IR level basic blocks at the
+ /// SelectionDAG Level, one can generate multiple machine basic blocks
+ /// for one IR level basic block. This is how we handle bit tests and
+ /// switches.
+ ///
+ /// 2. At the MI level, tail calls are represented via a special return
+ /// MIInst called "tcreturn". Thus if we know the basic block in which we
+ /// wish to insert the stack protector check, we get the correct behavior
+ /// by always inserting the stack protector check right before the return
+ /// statement. This is a "magical transformation" since no matter where
+ /// the stack protector check intrinsic is, we always insert the stack
+ /// protector check code at the end of the BB.
+ ///
+ /// Given the aforementioned constraints, the following solution was devised:
+ ///
+ /// 1. On platforms that do not support SelectionDAG stack protector check
+ /// generation, allow for the normal IR level stack protector check
+ /// generation to continue.
+ ///
+ /// 2. On platforms that do support SelectionDAG stack protector check
+ /// generation:
+ ///
+ /// a. Use the IR level stack protector pass to decide if a stack
+ /// protector is required/which BB we insert the stack protector check
+ /// in by reusing the logic already therein. If we wish to generate a
+ /// stack protector check in a basic block, we place a special IR
+ /// intrinsic called llvm.stackprotectorcheck right before the BB's
+ /// returninst or if there is a callinst that could potentially be
+ /// sibling call optimized, before the call inst.
+ ///
+ /// b. Then when a BB with said intrinsic is processed, we codegen the BB
+ /// normally via SelectBasicBlock. In said process, when we visit the
+ /// stack protector check, we do not actually emit anything into the
+ /// BB. Instead, we just initialize the stack protector descriptor
+ /// class (which involves stashing information/creating the success
+ /// mbbb and the failure mbb if we have not created one for this
+ /// function yet) and export the guard variable that we are going to
+ /// compare.
+ ///
+ /// c. After we finish selecting the basic block, in FinishBasicBlock if
+ /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
+ /// initialized, we produce the validation code with one of these
+ /// techniques:
+ /// 1) with a call to a guard check function
+ /// 2) with inlined instrumentation
+ ///
+ /// 1) We insert a call to the check function before the terminator.
+ ///
+ /// 2) We first find a splice point in the parent basic block
+ /// before the terminator and then splice the terminator of said basic
+ /// block into the success basic block. Then we code-gen a new tail for
+ /// the parent basic block consisting of the two loads, the comparison,
+ /// and finally two branches to the success/failure basic blocks. We
+ /// conclude by code-gening the failure basic block if we have not
+ /// code-gened it already (all stack protector checks we generate in
+ /// the same function, use the same failure basic block).
+ class StackProtectorDescriptor {
+ public:
+ StackProtectorDescriptor() = default;
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB;
+ }
+
+ bool shouldEmitFunctionBasedCheckStackProtector() const {
+ return ParentMBB && !SuccessMBB && !FailureMBB;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+ bool FunctionBasedInstrumentation) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ if (!FunctionBasedInstrumentation) {
+ SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+ }
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = nullptr;
+ SuccessMBB = nullptr;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() {
+ FailureMBB = nullptr;
+ }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+ private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB = nullptr;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB = nullptr;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB = nullptr;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created. Assign a large weight if IsLikely is true.
+ MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB = nullptr);
+ };
+
+private:
+ const TargetMachine &TM;
+
+public:
+ /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
+ /// nodes without a corresponding SDNode.
+ static const unsigned LowestSDNodeOrder = 1;
+
+ SelectionDAG &DAG;
+ const DataLayout *DL = nullptr;
+ AliasAnalysis *AA = nullptr;
+ const TargetLibraryInfo *LibInfo;
+
+ class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
+ public:
+ SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
+ : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
+
+ virtual void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) override {
+ SDB->addSuccessorWithProb(Src, Dst, Prob);
+ }
+
+ private:
+ SelectionDAGBuilder *SDB;
+ };
+
+ std::unique_ptr<SDAGSwitchLowering> SL;
+
+ /// A StackProtectorDescriptor structure used to communicate stack protector
+ /// information in between SelectBasicBlock and FinishBasicBlock.
+ StackProtectorDescriptor SPDescriptor;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// Information about the function as a whole.
+ FunctionLoweringInfo &FuncInfo;
+
+ /// Information about the swifterror values used throughout the function.
+ SwiftErrorValueTracking &SwiftError;
+
+ /// Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
+
+ /// This is set to true if a call in the current block has been translated as
+ /// a tail call. In this case, no subsequent DAG nodes should be created.
+ bool HasTailCall = false;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SwiftError(swifterror) {}
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
+ const TargetLibraryInfo *li);
+
+ /// Clear out the current SelectionDAG and the associated state and prepare
+ /// this SelectionDAGBuilder object to be used for a new block. This doesn't
+ /// clear out information about additional blocks that are needed to complete
+ /// switch lowering or PHI node updating; that information is cleared out as
+ /// it is consumed.
+ void clear();
+
+ /// Clear the dangling debug information map. This function is separated from
+ /// the clear so that debug information that is dangling in a basic block can
+ /// be properly resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// Return the current virtual root of the Selection DAG, flushing any
+ /// PendingLoad items. This must be done before emitting a store or any other
+ /// node that may need to be ordered after any prior load instructions.
+ SDValue getRoot();
+
+ /// Similar to getRoot, but instead of flushing all the PendingLoad items,
+ /// flush all the PendingExports items. It is necessary to do this before
+ /// emitting a terminator instruction.
+ SDValue getControlRoot();
+
+ SDLoc getCurSDLoc() const {
+ return SDLoc(CurInst, SDNodeOrder);
+ }
+
+ DebugLoc getCurDebugLoc() const {
+ return CurInst ? CurInst->getDebugLoc() : DebugLoc();
+ }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ /// If there was virtual register allocated for the value V emit CopyFromReg
+ /// of the specified type Ty. Return empty SDValue() otherwise.
+ SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
+ /// If we have dangling debug info that describes \p Variable, or an
+ /// overlapping part of variable considering the \p Expr, then this method
+ /// will drop that debug info as it isn't valid any longer.
+ void dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr);
+
+ /// If we saw an earlier dbg_value referring to V, generate the debug data
+ /// structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+
+ /// For the given dangling debuginfo record, perform last-ditch efforts to
+ /// resolve the debuginfo to something that is represented in this DAG. If
+ /// this cannot be done, produce an Undef debug value record.
+ void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+
+ /// For a given Value, attempt to create and record a SDDbgValue in the
+ /// SelectionDAG.
+ bool handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc CurDL,
+ DebugLoc InstDL, unsigned Order);
+
+ /// Evict any dangling debug information, attempting to salvage it first.
+ void resolveOrClearDbgInfo();
+
+ SDValue getValue(const Value *V);
+ bool findValue(const Value *V) const;
+
+ /// Return the SDNode for the specified IR value if it exists.
+ SDNode *getNodeForIRValue(const Value *V) {
+ if (NodeMap.find(V) == NodeMap.end())
+ return nullptr;
+ return NodeMap[V].getNode();
+ }
+
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb,
+ bool InvertCond);
+ bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ const BasicBlock *EHPadBB = nullptr);
+
+ // Lower range metadata from 0 to N to assert zext to an integer of nearest
+ // floor power of two.
+ SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
+ SDValue Op);
+
+ void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
+ const CallBase *Call, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, bool IsPatchPoint);
+
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
+
+ /// When an MBB was split during scheduling, update the
+ /// references that need to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+ /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes
+ /// of lowering into a STATEPOINT node.
+ struct StatepointLoweringInfo {
+ /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set
+ /// of gc pointers this STATEPOINT has to relocate.
+ SmallVector<const Value *, 16> Bases;
+ SmallVector<const Value *, 16> Ptrs;
+
+ /// The set of gc.relocate calls associated with this gc.statepoint.
+ SmallVector<const GCRelocateInst *, 16> GCRelocates;
+
+ /// The full list of gc arguments to the gc.statepoint being lowered.
+ ArrayRef<const Use> GCArgs;
+
+ /// The gc.statepoint instruction.
+ const Instruction *StatepointInstr = nullptr;
+
+ /// The list of gc transition arguments present in the gc.statepoint being
+ /// lowered.
+ ArrayRef<const Use> GCTransitionArgs;
+
+ /// The ID that the resulting STATEPOINT instruction has to report.
+ unsigned ID = -1;
+
+ /// Information regarding the underlying call instruction.
+ TargetLowering::CallLoweringInfo CLI;
+
+ /// The deoptimization state associated with this gc.statepoint call, if
+ /// any.
+ ArrayRef<const Use> DeoptState;
+
+ /// Flags associated with the meta arguments being lowered.
+ uint64_t StatepointFlags = -1;
+
+ /// The number of patchable bytes the call needs to get lowered into.
+ unsigned NumPatchBytes = -1;
+
+ /// The exception handling unwind destination, in case this represents an
+ /// invoke of gc.statepoint.
+ const BasicBlock *EHPadBB = nullptr;
+
+ explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {}
+ };
+
+ /// Lower \p SLI into a STATEPOINT instruction.
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI);
+
+ // This function is responsible for the whole statepoint lowering process.
+ // It uniformly handles invoke and call statepoints.
+ void LowerStatepoint(ImmutableStatepoint ISP,
+ const BasicBlock *EHPadBB = nullptr);
+
+ void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
+ const BasicBlock *EHPadBB);
+
+ void LowerDeoptimizeCall(const CallInst *CI);
+ void LowerDeoptimizingReturn();
+
+ void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
+ const BasicBlock *EHPadBB,
+ bool VarArgDisallowed,
+ bool ForceVoidReturnTy);
+
+ /// Returns the type of FrameIndex and TargetFrameIndex nodes.
+ MVT getFrameIndexTy() {
+ return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
+ }
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
+
+public:
+ void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
+ void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+ void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
+ void visitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, unsigned Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+ void visitJumpTable(SwitchCG::JumpTable &JT);
+ void visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitCallBr(const CallBrInst &I);
+ void visitResume(const ResumeInst &I);
+
+ void visitUnary(const User &I, unsigned Opcode);
+ void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
+
+ void visitBinary(const User &I, unsigned Opcode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I);
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+ void visitAddrSpaceCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const User &I);
+ void visitInsertValue(const User &I);
+ void visitLandingPad(const LandingPadInst &LP);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+ void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
+ void visitMaskedGather(const CallInst &I);
+ void visitMaskedScatter(const CallInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpCall(const CallInst &I);
+ bool visitMemPCpyCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
+ bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+ bool visitStrCmpCall(const CallInst &I);
+ bool visitStrLenCall(const CallInst &I);
+ bool visitStrNLenCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+ void visitLoadFromSwiftError(const LoadInst &I);
+ void visitStoreToSwiftError(const StoreInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
+ void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+ void visitStackmap(const CallInst &I);
+ void visitPatchpoint(ImmutableCallSite CS,
+ const BasicBlock *EHPadBB = nullptr);
+
+ // These two are implemented in StatepointLowering.cpp
+ void visitGCRelocate(const GCRelocateInst &Relocate);
+ void visitGCResult(const GCResultInst &I);
+
+ void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ void processIntegerCallValue(const Instruction &I,
+ SDValue Value, bool IsSigned);
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
+
+ /// If V is an function argument then create corresponding DBG_VALUE machine
+ /// instruction for it now. At the end of instruction selection, they will be
+ /// inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
+ DIExpression *Expr, DILocation *DL,
+ bool IsDbgDeclare, const SDValue &N);
+
+ /// Return the next block after MBB, or nullptr if there is none.
+ MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+ /// Update the DAG and DAG builder with the relevant information after
+ /// a new root node has been created which could be a tail call.
+ void updateDAGForMaybeTailCall(SDValue MaybeTC);
+
+ /// Return the appropriate SDDbgValue based on N.
+ SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
+ DIExpression *Expr, const DebugLoc &dl,
+ unsigned DbgSDNodeOrder);
+
+ /// Lowers CallInst to an external symbol.
+ void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
+};
+
+/// This struct represents the registers (physical or virtual)
+/// that a particular set of values is assigned, and the type information about
+/// the value. The most common situation is to represent one value at a time,
+/// but struct or array values are handled element-wise as multiple values. The
+/// splitting of aggregates is performed recursively, so that we never have
+/// aggregate-typed registers. The values at this point do not necessarily have
+/// legal types, so each value may require one or more registers of some legal
+/// type.
+///
+struct RegsForValue {
+ /// The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// The value types of the registers. This is the same size as ValueVTs and it
+ /// records, for each value, what the type of the assigned register or
+ /// registers are. (Individual values are never synthesized from more than one
+ /// type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ SmallVector<MVT, 4> RegVTs;
+
+ /// This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ SmallVector<unsigned, 4> Regs;
+
+ /// This list holds the number of registers for each value.
+ SmallVector<unsigned, 4> RegCount;
+
+ /// Records if this value needs to be treated in an ABI dependant manner,
+ /// different to normal type legalization.
+ Optional<CallingConv::ID> CallConv;
+
+ RegsForValue() = default;
+ RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+ Optional<CallingConv::ID> CC = None);
+ RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ Optional<CallingConv::ID> CC);
+
+ bool isABIMangled() const {
+ return CallConv.hasValue();
+ }
+
+ /// Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ RegCount.push_back(RHS.Regs.size());
+ }
+
+ /// Emit a series of CopyFromReg nodes that copies from this value and returns
+ /// the result as a ValueVTs value. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
+ /// flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const Value *V = nullptr) const;
+
+ /// Emit a series of CopyToReg nodes that copies the specified value into the
+ /// registers specified by this object. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no
+ /// flag is used. If V is not nullptr, then it is used in printing better
+ /// diagnostic messages on error.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
+ SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+
+ /// Add this value to the specified inlineasm node operand list. This adds the
+ /// code marker, matching input operand index (if applicable), and includes
+ /// the number of values added into it.
+ void AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+
+ /// Check if the total RegCount is greater than one.
+ bool occupiesMultipleRegs() const {
+ return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1;
+ }
+
+ /// Return a list of registers and their sizes.
+ SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 0000000000000..bc10f76212394
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,963 @@
+//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SDNodeDbgValue.h"
+#include <cstdint>
+#include <iterator>
+
+using namespace llvm;
+
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueConstant";
+ return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::SPONENTRY: return "SPONENTRY";
+ case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
+ case ISD::READ_REGISTER: return "READ_REGISTER";
+ case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID, None);
+ else if (!G)
+ return "Unknown intrinsic";
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueTargetConstant";
+ return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::MCSymbol: return "MCSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::INLINEASM_BR: return "inlineasm_br";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::ANNOTATION_LABEL: return "annotation_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FMINNUM: return "fminnum";
+ case ISD::STRICT_FMINNUM: return "strict_fminnum";
+ case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
+ case ISD::FMINNUM_IEEE: return "fminnum_ieee";
+ case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
+ case ISD::FMINIMUM: return "fminimum";
+ case ISD::FMAXIMUM: return "fmaximum";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::STRICT_FSQRT: return "strict_fsqrt";
+ case ISD::FCBRT: return "fcbrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::STRICT_FSIN: return "strict_fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::STRICT_FCOS: return "strict_fcos";
+ case ISD::FSINCOS: return "fsincos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::STRICT_FTRUNC: return "strict_ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::STRICT_FFLOOR: return "strict_ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::STRICT_FCEIL: return "strict_fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::STRICT_FRINT: return "strict_frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
+ case ISD::FROUND: return "fround";
+ case ISD::STRICT_FROUND: return "strict_fround";
+ case ISD::FEXP: return "fexp";
+ case ISD::STRICT_FEXP: return "strict_fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::STRICT_FEXP2: return "strict_fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::STRICT_FLOG: return "strict_flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::STRICT_FLOG2: return "strict_flog2";
+ case ISD::FLOG10: return "flog10";
+ case ISD::STRICT_FLOG10: return "strict_flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FSHL: return "fshl";
+ case ISD::FSHR: return "fshr";
+ case ISD::FADD: return "fadd";
+ case ISD::STRICT_FADD: return "strict_fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::STRICT_FSUB: return "strict_fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::STRICT_FMUL: return "strict_fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::STRICT_FDIV: return "strict_fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::STRICT_FMA: return "strict_fma";
+ case ISD::FMAD: return "fmad";
+ case ISD::FREM: return "frem";
+ case ISD::STRICT_FREM: return "strict_frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FCANONICALIZE: return "fcanonicalize";
+ case ISD::FPOW: return "fpow";
+ case ISD::STRICT_FPOW: return "strict_fpow";
+ case ISD::SMIN: return "smin";
+ case ISD::SMAX: return "smax";
+ case ISD::UMIN: return "umin";
+ case ISD::UMAX: return "umax";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::STRICT_FPOWI: return "strict_fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SETCCCARRY: return "setcccarry";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::ADDCARRY: return "addcarry";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SUBCARRY: return "subcarry";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ case ISD::SADDSAT: return "saddsat";
+ case ISD::UADDSAT: return "uaddsat";
+ case ISD::SSUBSAT: return "ssubsat";
+ case ISD::USUBSAT: return "usubsat";
+
+ case ISD::SMULFIX: return "smulfix";
+ case ISD::SMULFIXSAT: return "smulfixsat";
+ case ISD::UMULFIX: return "umulfix";
+ case ISD::UMULFIXSAT: return "umulfixsat";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::STRICT_FP_ROUND: return "strict_fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_EXTEND: return "fp_extend";
+ case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::ADDRSPACECAST: return "addrspacecast";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::LROUND: return "lround";
+ case ISD::STRICT_LROUND: return "strict_lround";
+ case ISD::LLROUND: return "llround";
+ case ISD::STRICT_LLROUND: return "strict_llround";
+ case ISD::LRINT: return "lrint";
+ case ISD::STRICT_LRINT: return "strict_lrint";
+ case ISD::LLRINT: return "llrint";
+ case ISD::STRICT_LLRINT: return "strict_llrint";
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
+ case ISD::MGATHER: return "masked_gather";
+ case ISD::MSCATTER: return "masked_scatter";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::GC_TRANSITION_START: return "gc_transition.start";
+ case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
+
+ // Bit manipulation
+ case ISD::ABS: return "abs";
+ case ISD::BITREVERSE: return "bitreverse";
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setueq";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd";
+ case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul";
+ case ISD::VECREDUCE_ADD: return "vecreduce_add";
+ case ISD::VECREDUCE_MUL: return "vecreduce_mul";
+ case ISD::VECREDUCE_AND: return "vecreduce_and";
+ case ISD::VECREDUCE_OR: return "vecreduce_or";
+ case ISD::VECREDUCE_XOR: return "vecreduce_xor";
+ case ISD::VECREDUCE_SMAX: return "vecreduce_smax";
+ case ISD::VECREDUCE_SMIN: return "vecreduce_smin";
+ case ISD::VECREDUCE_UMAX: return "vecreduce_umax";
+ case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
+ case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
+ case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
+// Print the MMO with more information from the SelectionDAG.
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const MachineFunction *MF, const Module *M,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII, LLVMContext &Ctx) {
+ ModuleSlotTracker MST(M);
+ if (MF)
+ MST.incorporateFunction(MF->getFunction());
+ SmallVector<StringRef, 0> SSNs;
+ MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
+}
+
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const SelectionDAG *G) {
+ if (G) {
+ const MachineFunction *MF = &G->getMachineFunction();
+ return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
+ &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
+ *G->getContext());
+ } else {
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+
+LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+#endif
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (getFlags().hasNoUnsignedWrap())
+ OS << " nuw";
+
+ if (getFlags().hasNoSignedWrap())
+ OS << " nsw";
+
+ if (getFlags().hasExact())
+ OS << " exact";
+
+ if (getFlags().hasNoNaNs())
+ OS << " nnan";
+
+ if (getFlags().hasNoInfs())
+ OS << " ninf";
+
+ if (getFlags().hasNoSignedZeros())
+ OS << " nsz";
+
+ if (getFlags().hasAllowReciprocal())
+ OS << " arcp";
+
+ if (getFlags().hasAllowContract())
+ OS << " contract";
+
+ if (getFlags().hasApproximateFuncs())
+ OS << " afn";
+
+ if (getFlags().hasAllowReassociation())
+ OS << " reassoc";
+
+ if (getFlags().hasVectorReduction())
+ OS << " vector-reduction";
+
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ printMemOperand(OS, **i, G);
+ if (std::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ GADN->getGlobal()->printAsOperand(OS);
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << printReg(R->getReg(),
+ G ? G->getSubtarget().getRegisterInfo() : nullptr);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *LD->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *ST->getMemOperand(), G);
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *MLd->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MLd->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MLd->getMemoryVT().getEVTString();
+
+ if (MLd->isExpandingLoad())
+ OS << ", expanding";
+
+ OS << ">";
+ } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MSt->getMemOperand(), G);
+
+ if (MSt->isTruncatingStore())
+ OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+
+ if (MSt->isCompressingStore())
+ OS << ", compressing";
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *M->getMemOperand(), G);
+ OS << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
+ OS << "<";
+ BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
+ OS << ", ";
+ BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
+ OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const AddrSpaceCastSDNode *ASC =
+ dyn_cast<AddrSpaceCastSDNode>(this)) {
+ OS << '['
+ << ASC->getSrcAddressSpace()
+ << " -> "
+ << ASC->getDestAddressSpace()
+ << ']';
+ } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
+ if (LN->hasOffset())
+ OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
+ }
+
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+ if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
+ OS << " # D:" << isDivergent();
+
+ if (G && !G->GetDbgValues(this).empty()) {
+ OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
+ for (SDDbgValue *Dbg : G->GetDbgValues(this))
+ if (!Dbg->isInvalidated())
+ Dbg->print(OS);
+ } else if (getHasDebugValue())
+ OS << " [NoOfDbgValues>0]";
+ }
+}
+
+LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
+ OS << " DbgVal(Order=" << getOrder() << ')';
+ if (isInvalidated()) OS << "(Invalidated)";
+ if (isEmitted()) OS << "(Emitted)";
+ switch (getKind()) {
+ case SDNODE:
+ if (getSDNode())
+ OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')';
+ else
+ OS << "(SDNODE)";
+ break;
+ case CONST:
+ OS << "(CONST)";
+ break;
+ case FRAMEIX:
+ OS << "(FRAMEIX=" << getFrameIx() << ')';
+ break;
+ case VREG:
+ OS << "(VREG=" << getVReg() << ')';
+ break;
+ }
+ if (isIndirect()) OS << "(Indirect)";
+ OS << ":\"" << Var->getName() << '"';
+#ifndef NDEBUG
+ if (Expr->getNumElements())
+ Expr->dump();
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDDbgValue::dump() const {
+ if (isInvalidated())
+ return;
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
+ // Avoid lots of cluttering when inline printing nodes with associated
+ // DbgValues in verbose mode.
+ if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
+ return false;
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode(), G))
+ continue;
+ if (Op.getNode()->hasOneUse())
+ DumpNodes(Op.getNode(), indent+2, G);
+ }
+
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+LLVM_DUMP_METHOD void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = &*I;
+ if (!N->hasOneUse() && N != getRoot().getNode() &&
+ (!shouldPrintInline(*N, this) || N->use_empty()))
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n";
+
+ if (VerboseDAGDumping) {
+ if (DbgBegin() != DbgEnd())
+ dbgs() << "SDDbgValues:\n";
+ for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
+ Dbg->dump();
+ if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
+ dbgs() << "Byval SDDbgValues:\n";
+ for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
+ Dbg->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
+ print_types(OS, G);
+ OS << " = " << getOperationName(G);
+ print_details(OS, G);
+}
+
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ } else if (shouldPrintInline(*Value.getNode(), G)) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ } else {
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
+
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N).second) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << " ";
+
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (const SDValue &Op : N->op_values())
+ DumpNodesr(OS, Op.getNode(), indent+2, G, once);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, nullptr, once);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+#endif
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (const SDValue &Op : N->op_values()) {
+ // Don't follow chain operands.
+ if (Op.getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+#endif
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ printr(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ printOperand(OS, G, getOperand(i));
+ }
+ if (DebugLoc DL = getDebugLoc()) {
+ OS << ", ";
+ DL.print(OS);
+ }
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000000000..1f07a241a8242
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,3665 @@
+//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "isel"
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+ "Number of entry blocks where fast isel failed to lower arguments");
+
+static cl::opt<int> EnableFastISelAbort(
+ "fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction: 0 disable the abort, 1 will "
+ "abort but for args, calls and terminators, 2 will also "
+ "abort for argument lowering, and 3 will never fallback "
+ "to SelectionDAG."));
+
+static cl::opt<bool> EnableFastISelFallbackReport(
+ "fast-isel-report-on-fallback", cl::Hidden,
+ cl::desc("Emit a diagnostic when \"fast\" instruction selection "
+ "falls back to SelectionDAG."));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<std::string>
+FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
+ cl::desc("Only display the basic block whose name "
+ "matches this for all view-*-dags options"));
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
+ RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler>>
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler), cl::Hidden,
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+
+ //===--------------------------------------------------------------------===//
+ /// This class is used by SelectionDAGISel to temporarily override
+ /// the optimization level on a per-function basis.
+ class OptLevelChanger {
+ SelectionDAGISel &IS;
+ CodeGenOpt::Level SavedOptLevel;
+ bool SavedFastISel;
+
+ public:
+ OptLevelChanger(SelectionDAGISel &ISel,
+ CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ SavedOptLevel = IS.OptLevel;
+ if (NewOptLevel == SavedOptLevel)
+ return;
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
+ << NewOptLevel << "\n");
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
+ }
+
+ ~OptLevelChanger() {
+ if (IS.OptLevel == SavedOptLevel)
+ return;
+ LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
+ << SavedOptLevel << "\n");
+ IS.OptLevel = SavedOptLevel;
+ IS.TM.setOptLevel(SavedOptLevel);
+ IS.TM.setFastISel(SavedFastISel);
+ }
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering *TLI = IS->TLI;
+ const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
+ if (OptLevel == CodeGenOpt::None ||
+ (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
+ TLI->getSchedulingPreference() == Sched::Source)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
+ assert(TLI->getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+
+} // end namespace llvm
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(nullptr);
+}
+
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ assert(!MI.hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ MachineFunctionPass(ID), TM(tm),
+ FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
+ AA(), GFI(),
+ OptLevel(OL),
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+ delete SwiftError;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<StackProtector>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction. If available, we pass domtree
+/// and loop info to be updated when we split critical edges. This is because
+/// SelectionDAGISel preserves these analyses.
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
+ LoopInfo *LI) {
+ // Loop for blocks with phi nodes.
+ for (BasicBlock &BB : Fn) {
+ PHINode *PN = dyn_cast<PHINode>(BB.begin());
+ if (!PN) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (!CE || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(
+ Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+ CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
+ goto ReprocessBlock;
+ }
+ }
+}
+
+static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
+ MachineModuleInfo &MMI) {
+ // Only needed for MSVC
+ if (!TT.isWindowsMSVCEnvironment())
+ return;
+
+ // If it's already set, nothing to do.
+ if (MMI.usesMSVCFloatingPoint())
+ return;
+
+ for (const Instruction &I : instructions(F)) {
+ if (I.getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ for (const auto &Op : I.operands()) {
+ if (Op->getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // If we already selected that function, we do not need to run SDISel.
+ if (mf.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
+ "-fast-isel-abort > 0 requires -fast-isel");
+
+ const Function &Fn = mf.getFunction();
+ MF = &mf;
+
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ TM.resetTargetOptions(Fn);
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOpt::Level NewOptLevel = OptLevel;
+ if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
+ NewOptLevel = CodeGenOpt::None;
+ OptLevelChanger OLC(*this, NewOptLevel);
+
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+
+ LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
+
+ CurDAG->init(*MF, *ORE, this, LibInfo,
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
+ FuncInfo->set(Fn, *MF, CurDAG);
+ SwiftError->setFunction(*MF);
+
+ // Now get the optional analyzes if we want to.
+ // This is based on the possibly changed OptLevel (after optnone is taken
+ // into account). That's unfortunate but OK because it just means we won't
+ // ask for passes that have been required anyway.
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo->BPI = nullptr;
+
+ if (OptLevel != CodeGenOpt::None)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ else
+ AA = nullptr;
+
+ SDB->init(GFI, AA, LibInfo);
+
+ MF->setHasInlineAsm(false);
+
+ FuncInfo->SplitCSR = false;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const Instruction *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
+ continue;
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
+ SelectAllBasicBlocks(Fn);
+ if (FastISelFailed && EnableFastISelFallbackReport) {
+ DiagnosticInfoISelFallback DiagFallback(Fn);
+ Fn.getContext().diagnose(DiagFallback);
+ }
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ // Note: it is important that this happens **before** the call to
+ // EmitLiveInCopies, since implementations can skip copies of unused
+ // registers. If we don't apply the reg fixups before, some registers may
+ // appear as unused and will be skipped, resulting in bad MI.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+ E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ while (true) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E)
+ break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR) {
+ SmallVector<MachineBasicBlock*, 4> Returns;
+ // Collect all the return blocks.
+ for (MachineBasicBlock &MBB : mf) {
+ if (!MBB.succ_empty())
+ continue;
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && Term->isReturn()) {
+ Returns.push_back(&MBB);
+ continue;
+ }
+ }
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+ }
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
+ if (LI.second)
+ LiveInMap.insert(LI);
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ bool hasFI = MI->getOperand(0).isFI();
+ Register Reg =
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(Reg))
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ if (Def) {
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(std::next(InsertPos), MI);
+ } else
+ LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << Register::virtReg2Index(Reg) << "\n");
+ }
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ assert(!hasFI && "There's no handling of frame pointer updating here yet "
+ "- add if needed");
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsIndirect = MI->isIndirectDebugValue();
+ if (IsIndirect)
+ assert(MI->getOperand(1).getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+ IsIndirect, LDI->second, Variable, Expr);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = nullptr;
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = RegInfo->use_instr_begin(LDI->second),
+ E = RegInfo->use_instr_end(); UI != E; ) {
+ MachineInstr *UseMI = &*(UI++);
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = nullptr; break;
+ }
+ if (CopyUseMI) {
+ // Use MI's debug location, which describes where Variable was
+ // declared, rather than whatever is attached to CopyUseMI.
+ MachineInstr *NewMI =
+ BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ CopyUseMI->getOperand(0).getReg(), Variable, Expr);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
+ }
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ for (const auto &MBB : *MF) {
+ if (MFI.hasCalls() && MF->hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ const MCInstrDesc &MCID = TII->get(MI.getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ MI.isStackAligningInlineAsm()) {
+ MFI.setHasCalls(true);
+ }
+ if (MI.isInlineAsm()) {
+ MF->setHasInlineAsm(true);
+ }
+ }
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+
+ // Determine if floating point is used for msvc
+ computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ while (true) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
+ TLI->finalizeLowering(*MF);
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ LLVM_DEBUG(MF->print(dbgs()));
+
+ return true;
+}
+
+static void reportFastISelFailure(MachineFunction &MF,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R,
+ bool ShouldAbort) {
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || ShouldAbort)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (ShouldAbort)
+ report_fatal_error(R.getMsg());
+
+ ORE.emit(R);
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Allow creating illegal types during DAG building for the basic block.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
+ // Lower the instructions. If a call is emitted as a tail call, cease emitting
+ // nodes for this block.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ if (!ElidedArgCopyInstrs.count(&*I))
+ SDB->visit(*I);
+ }
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->resolveOrClearDbgInfo();
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 16> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ KnownBits Known;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N).second)
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other)
+ Worklist.push_back(Op.getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!Register::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Known = CurDAG->computeKnownBits(Src);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ StringRef GroupName = "sdag";
+ StringRef GroupDescription = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
+#endif
+
+ // Pre-type legalization allow creation of any node types.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
+#ifndef NDEBUG
+ MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
+ FilterDAGBasicBlockName ==
+ FuncInfo->MBB->getBasicBlock()->getName());
+#endif
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockName =
+ (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
+ }
+ LLVM_DEBUG(dbgs() << "Initial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (ViewDAGCombine1 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
+ }
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize-types input for " + BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ // Only allow creation of legal node types.
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
+ if (Changed) {
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
+ GroupName, GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
+ }
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+ }
+
+ {
+ NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ {
+ NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
+ GroupName, GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
+ }
+
+ LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+ }
+
+ if (ViewLegalizeDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (ViewDAGCombine2 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
+ }
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs && MatchFilterBB)
+ CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("isel", "Instruction Selection", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ LLVM_DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (ViewSchedDAGs && MatchFilterBB)
+ CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
+ }
+
+ if (ViewSUnitDAGs && MatchFilterBB)
+ Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("emit", "Instruction Creation", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
+ GroupDescription, TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+namespace {
+
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+
+} // end anonymous namespace
+
+// This function is used to enforce the topological node id property
+// property leveraged during Instruction selection. Before selection all
+// nodes are given a non-negative id such that all nodes have a larger id than
+// their operands. As this holds transitively we can prune checks that a node N
+// is a predecessor of M another by not recursively checking through M's
+// operands if N's ID is larger than M's ID. This is significantly improves
+// performance of for various legality checks (e.g. IsLegalToFold /
+// UpdateChains).
+
+// However, when we fuse multiple nodes into a single node
+// during selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged violating the topological property.
+// Should a fused node have a successor which has yet to be selected, our
+// legality checks would be incorrect. To avoid this we mark all unselected
+// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
+// We use bit-negation to more clearly enforce that node id -1 can only be
+// achieved by selected nodes). As the conversion is reversable the original Id,
+// topological pruning can still be leveraged when looking for unselected nodes.
+// This method is call internally in all ISel replacement calls.
+void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ SDNode *N = Nodes.pop_back_val();
+ for (auto *U : N->uses()) {
+ auto UId = U->getNodeId();
+ if (UId > 0) {
+ InvalidateNodeId(U);
+ Nodes.push_back(U);
+ }
+ }
+ }
+}
+
+// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// NodeId with the equivalent node id which is invalid for topological
+// pruning.
+void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
+ int InvalidId = -(N->getNodeId() + 1);
+ N->setNodeId(InvalidId);
+}
+
+// getUninvalidatedNodeId - get original uninvalidated node id.
+int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) {
+ int Id = N->getNodeId();
+ if (Id < -1)
+ return -(Id + 1);
+ return Id;
+}
+
+void SelectionDAGISel::DoInstructionSelection() {
+ LLVM_DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = &*--ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+#ifndef NDEBUG
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ auto N = Nodes.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0)
+ continue;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op->getOpcode() == ISD::TokenFactor)
+ Nodes.push_back(Op.getNode());
+ else {
+ // We rely on topological ordering of node ids for checking for
+ // cycles when fusing nodes during selection. All unselected nodes
+ // successors of an already selected node should have a negative id.
+ // This assertion will catch such cases. If this assertion triggers
+ // it is likely you using DAG-level Value/Node replacement functions
+ // (versus equivalent ISEL replacement) in backend-specific
+ // selections. See comment in EnforceNodeIdInvariant for more
+ // details.
+ assert(Op->getNodeId() != -1 &&
+ "Node has already selected predecessor node");
+ }
+ }
+ }
+#endif
+
+ // When we are using non-default rounding modes or FP exception behavior
+ // FP operations are represented by StrictFP pseudo-operations. For
+ // targets that do not (yet) understand strict FP operations directly,
+ // we convert them to normal FP opcodes instead at this point. This
+ // will allow them to be handled by existing target-specific instruction
+ // selectors.
+ if (Node->isStrictFPOpcode() &&
+ (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+ != TargetLowering::Legal))
+ Node = CurDAG->mutateStrictFPToFP(Node);
+
+ LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
+ Node->dump(CurDAG));
+
+ Select(Node);
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
+// wasm.landingpad.index intrinsic is for associating a landing pad index number
+// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
+// and store the mapping in the function.
+static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
+ const CatchPadInst *CPI) {
+ MachineFunction *MF = MBB->getParent();
+ // In case of single catch (...), we don't emit LSDA, so we don't need
+ // this information.
+ bool IsSingleCatchAllClause =
+ CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue();
+ if (!IsSingleCatchAllClause) {
+ // Create a mapping from landing pad label to landing pad index.
+ bool IntrFound = false;
+ for (const User *U : CPI->users()) {
+ if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = Call->getIntrinsicID();
+ if (IID == Intrinsic::wasm_landingpad_index) {
+ Value *IndexArg = Call->getArgOperand(1);
+ int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
+ MF->setWasmLandingPadIndex(MBB, Index);
+ IntrFound = true;
+ break;
+ }
+ }
+ }
+ assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
+ (void)IntrFound;
+ }
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+bool SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const TargetRegisterClass *PtrRC =
+ TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+
+ auto Pers = classifyEHPersonality(PersonalityFn);
+
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (isFuncletEHPersonality(Pers)) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ }
+ return true;
+ }
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->addLandingPad(MBB);
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ if (Pers == EHPersonality::Wasm_CXX) {
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
+ mapWasmLandingPadIndex(MBB, CPI);
+ } else {
+ // Assign the call site to the landing pad's begin label.
+ MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+ // Mark exception register as live in.
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+ FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ // Mark exception selector register as live in.
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+ FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+ }
+
+ return true;
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !I->isTerminator() && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+/// Collect llvm.dbg.declare information. This is done after argument lowering
+/// in case the declarations refer to arguments.
+static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
+ MachineFunction *MF = FuncInfo->MF;
+ const DataLayout &DL = MF->getDataLayout();
+ for (const BasicBlock &BB : *FuncInfo->Fn) {
+ for (const Instruction &I : BB) {
+ const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (!DI)
+ continue;
+
+ assert(DI->getVariable() && "Missing variable");
+ assert(DI->getDebugLoc() && "Missing location");
+ const Value *Address = DI->getAddress();
+ if (!Address)
+ continue;
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo->StaticAllocaMap.find(AI);
+ if (SI != FuncInfo->StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo->getArgumentFrameIndex(Arg);
+
+ if (FI == std::numeric_limits<int>::max())
+ continue;
+
+ DIExpression *Expr = DI->getExpression();
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
+ Offset.getZExtValue());
+ MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
+ }
+ }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ FastISelFailed = false;
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = nullptr;
+ if (TM.Options.EnableFastISel) {
+ LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
+ FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+ }
+
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+
+ // Lower arguments up front. An RPO iteration always visits the entry block
+ // first.
+ assert(*RPOT.begin() == &Fn.getEntryBlock());
+ ++NumEntryBlocks;
+
+ // Set up FuncInfo for ISel. Entry blocks never have PHIs.
+ FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
+ FuncInfo->InsertPt = FuncInfo->MBB->begin();
+
+ CurDAG->setFunctionLoweringInfo(FuncInfo);
+
+ if (!FastIS) {
+ LowerArguments(Fn);
+ } else {
+ // See if fast isel can lower the arguments.
+ FastIS->startNewBlock();
+ if (!FastIS->lowerArguments()) {
+ FastISelFailed = true;
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Fn.getSubprogram(),
+ &Fn.getEntryBlock());
+ R << "FastISel didn't lower all arguments: "
+ << ore::NV("Prototype", Fn.getType());
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+
+ bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+ if (FastIS && Inserted)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+
+ processDbgDeclares(FuncInfo);
+
+ // Iterate over all basic blocks in the function.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ for (const BasicBlock *LLVMBB : RPOT) {
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->ComputePHILiveOutRegInfo(&PN);
+ } else {
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
+
+ // Insert new instructions after any phi or argument setup code.
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Setup an EH landing-pad block.
+ FuncInfo->ExceptionPointerVirtReg = 0;
+ FuncInfo->ExceptionSelectorVirtReg = 0;
+ if (LLVMBB->isEHPad())
+ if (!PrepareEHLandingPad())
+ continue;
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ if (LLVMBB != &Fn.getEntryBlock())
+ FastIS->startNewBlock();
+
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
+
+ // Pre-assign swifterror vregs.
+ SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
+
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = &*std::prev(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ ElidedArgCopyInstrs.count(Inst)) {
+ --NumFastIselRemaining;
+ continue;
+ }
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->selectInstruction(Inst)) {
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
+ // If we succeeded, don't re-select the load.
+ BI = std::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
+ continue;
+ }
+
+ FastISelFailed = true;
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ // We cannot separate out GCrelocates to their own blocks since we need
+ // to keep track of gc-relocates for a particular gc-statepoint. This is
+ // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
+ // visitGCRelocate.
+ if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
+ !isGCResult(Inst)) {
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ R << "FastISel missed call";
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+
+ R << ": " << InstStr.str();
+ }
+
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
+
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst);
+ }
+
+ bool HadTailCall = false;
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ // We also need to delete any previously emitted instructions.
+ if (HadTailCall) {
+ FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
+ --BI;
+ break;
+ }
+
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
+ continue;
+ }
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ bool ShouldAbort = EnableFastISelAbort;
+ if (Inst->isTerminator()) {
+ // Use a different message for terminator misses.
+ R << "FastISel missed terminator";
+ // Don't abort for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ R << "FastISel missed";
+ }
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+ R << ": " << InstStr.str();
+ }
+
+ reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
+
+ NumFastIselFailures += NumFastIselRemaining;
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ }
+
+ if (SP.shouldEmitSDCheck(*LLVMBB)) {
+ bool FunctionBasedInstrumentation =
+ TLI->getSSPStackGuardCheck(*Fn.getParent());
+ SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
+ FunctionBasedInstrumentation);
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+
+ // But if FastISel was run, we already selected some of the block.
+ // If we emitted a tail-call, we need to delete any previously emitted
+ // instruction that follows it.
+ if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
+ }
+
+ if (FastIS)
+ FastIS->finishBasicBlock();
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ ElidedArgCopyInstrs.clear();
+ }
+
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
+ SwiftError->propagateVRegs();
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+ SDB->SPDescriptor.resetPerFunctionState();
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef())
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ return MI.isDebugValue();
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+static MachineBasicBlock::iterator
+FindSplitPointForStackProtector(MachineBasicBlock *BB) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ //
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+ LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e;
+ ++i) dbgs()
+ << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+
+ // Handle stack protector.
+ if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ // The target provides a guard check function. There is no need to
+ // generate error handling code or to split current basic block.
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+
+ // Add load and check to the basicblock.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt =
+ FindSplitPointForStackProtector(ParentMBB);
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ } else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint =
+ FindSplitPointForStackProtector(ParentMBB);
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
+ SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt = ParentMBB->end();
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ FuncInfo->MBB = FailureMBB;
+ FuncInfo->InsertPt = FailureMBB->end();
+ SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ }
+
+ // Lower each BitTestBlock.
+ for (auto &BTB : SDB->SL->BitTestCases) {
+ // Lower header first, if it wasn't already lowered
+ if (!BTB.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(BTB, FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j],
+ FuncInfo->MBB);
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB, unless the latter was skipped.
+ if (PHIBB == BTB.Default) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(BTB.Cases.back().ThisBB);
+ }
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = BTB.Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = BTB.Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ }
+ }
+ }
+ SDB->SL->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->SL->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
+ SDB->SL->JTCases[i].first, FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->SL->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->SL->JTCases[i].second.Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->SL->JTCases[i].first.HeaderBB);
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
+ }
+ }
+ SDB->SL->JTCases.clear();
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
+ if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator
+ MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+ MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+ MachineInstrBuilder PHI(*MF, MBBI);
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SL->SwitchCases.clear();
+}
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ return ISHeuristic(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (!ActualMask.isSubsetOf(DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (!ActualMask.isSubsetOf(DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ KnownBits Known = CurDAG->computeKnownBits(LHS);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if (NeededMask.isSubsetOf(Known.One))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
+ const SDLoc &DL) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+
+ unsigned TiedToOperand;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ // We need the constraint ID from the operand this is tied to.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ for (; TiedToOperand; --TiedToOperand) {
+ CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ }
+ }
+
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return nullptr;
+}
+
+/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
+/// beyond "ImmedUse". We may ignore chains as they are checked separately.
+static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
+ bool IgnoreChains) {
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 16> WorkList;
+ // Only check if we have non-immediate uses of Def.
+ if (ImmedUse->isOnlyUserOf(Def))
+ return false;
+
+ // We don't care about paths to Def that go through ImmedUse so mark it
+ // visited and mark non-def operands as used.
+ Visited.insert(ImmedUse);
+ for (const SDValue &Op : ImmedUse->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chain deps (they are validated by
+ // HandleMergeInputChains) and immediate uses
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
+
+ // Initialize worklist to operands of Root.
+ if (Root != ImmedUse) {
+ for (const SDValue &Op : Root->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chains (they are validated by HandleMergeInputChains)
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
+ }
+
+ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (!GU)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, HandleMergeInputChains will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
+}
+
+void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
+ SDLoc DL(N);
+
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops, DL);
+
+ const EVT VTs[] = {MVT::Other, MVT::Glue};
+ SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
+ New->setNodeId(-1);
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+}
+
+void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ Register Reg =
+ TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+ CurDAG->getMachineFunction());
+ SDValue New = CurDAG->getCopyFromReg(
+ Op->getOperand(0), dl, Reg, Op->getValueType(0));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Op->getOperand(2).getValueType(),
+ CurDAG->getMachineFunction());
+ SDValue New = CurDAG->getCopyToReg(
+ Op->getOperand(0), dl, Reg, Op->getOperand(2));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+/// When a match is complete, this method updates uses of interior chain results
+/// to use the new results.
+void SelectionDAGISel::UpdateChains(
+ SDNode *NodeToMatch, SDValue InputChain,
+ SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+ // If ChainNode is null, it's because we replaced it on a previous
+ // iteration and we cleared it out of the map. Just skip it.
+ if (!ChainNode)
+ continue;
+
+ assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted node left in chain");
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ SelectionDAG::DAGNodeDeletedListener NDL(
+ *CurDAG, [&](SDNode *N, SDNode *E) {
+ std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+ static_cast<SDNode *>(nullptr));
+ });
+ if (ChainNode->getOpcode() != ISD::TokenFactor)
+ ReplaceUses(ChainVal, InputChain);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
+
+ LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ SmallVector<SDValue, 3> InputChains;
+ unsigned int Max = 8192;
+
+ // Quick exit on trivial merge.
+ if (ChainNodesMatched.size() == 1)
+ return ChainNodesMatched[0]->getOperand(0);
+
+ // Add chains that aren't already added (internal). Peek through
+ // token factors.
+ std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
+ if (V.getValueType() != MVT::Other)
+ return;
+ if (V->getOpcode() == ISD::EntryToken)
+ return;
+ if (!Visited.insert(V.getNode()).second)
+ return;
+ if (V->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : V->op_values())
+ AddChains(Op);
+ } else
+ InputChains.push_back(V);
+ };
+
+ for (auto *N : ChainNodesMatched) {
+ Worklist.push_back(N);
+ Visited.insert(N);
+ }
+
+ while (!Worklist.empty())
+ AddChains(Worklist.pop_back_val()->getOperand(0));
+
+ // Skip the search if there are no chain dependencies.
+ if (InputChains.size() == 0)
+ return CurDAG->getEntryNode();
+
+ // If one of these chains is a successor of input, we must have a
+ // node that is both the predecessor and successor of the
+ // to-be-merged nodes. Fail.
+ Visited.clear();
+ for (SDValue V : InputChains)
+ Worklist.push_back(V.getNode());
+
+ for (auto *N : ChainNodesMatched)
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
+ return SDValue();
+
+ // Return merged chain.
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
+ MVT::Other, InputChains);
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ ReplaceUses(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults - 1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ ReplaceUses(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults - 1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node) {
+ ReplaceNode(Node, Res);
+ } else {
+ EnforceNodeIdInvariant(Res);
+ }
+
+ return Res;
+}
+
+/// CheckSame - Implements OP_CheckSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckChildSame - Implements OP_CheckChildXSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
+ RecordedNodes);
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
+ DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ if (2 >= N.getNumOperands())
+ return false;
+ return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Same:
+ case SelectionDAGISel::OPC_CheckChild1Same:
+ case SelectionDAGISel::OPC_CheckChild2Same:
+ case SelectionDAGISel::OPC_CheckChild3Same:
+ Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckTypeRes: {
+ unsigned Res = Table[Index++];
+ Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ }
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(
+ Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
+ Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild2CondCode:
+ Result = !::CheckChild2CondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Integer:
+ case SelectionDAGISel::OPC_CheckChild1Integer:
+ case SelectionDAGISel::OPC_CheckChild2Integer:
+ case SelectionDAGISel::OPC_CheckChild3Integer:
+ case SelectionDAGISel::OPC_CheckChild4Integer:
+ Result = !::CheckChildInteger(Table, Index, N,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched;
+};
+
+/// \A DAG update listener to keep the matching state
+/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
+/// change the DAG while matching. X86 addressing mode matcher is an example
+/// for this.
+class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
+{
+ SDNode **NodeToMatch;
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
+
+public:
+ MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+ SmallVectorImpl<MatchScope> &MS)
+ : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+ RecordedNodes(RN), MatchScopes(MS) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Some early-returns here to avoid the search if we deleted the node or
+ // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
+ // do, so it's unnecessary to update matching state at that point).
+ // Neither of these can occur currently because we only install this
+ // update listener during matching a complex patterns.
+ if (!E || E->isMachineOpcode())
+ return;
+ // Check if NodeToMatch was updated.
+ if (N == *NodeToMatch)
+ *NodeToMatch = E;
+ // Performing linear search here does not matter because we almost never
+ // run this code. You'd have to have a CSE during complex pattern
+ // matching.
+ for (auto &I : RecordedNodes)
+ if (I.first.getNode() == N)
+ I.first.setNode(E);
+
+ for (auto &I : MatchScopes)
+ for (auto &J : I.NodeStack)
+ if (J.getNode() == N)
+ J.setNode(E);
+ }
+};
+
+} // end anonymous namespace
+
+void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
+ const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::RegisterMask:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::MCSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
+ CurDAG->RemoveDeadNode(NodeToMatch);
+ return;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ Select_INLINEASM(NodeToMatch,
+ NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
+ return;
+ case ISD::READ_REGISTER:
+ Select_READ_REGISTER(NodeToMatch);
+ return;
+ case ISD::WRITE_REGISTER:
+ Select_WRITE_REGISTER(NodeToMatch);
+ return;
+ case ISD::UNDEF:
+ Select_UNDEF(NodeToMatch);
+ return;
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+
+ LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (true) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (true) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (true) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ LLVM_DEBUG(
+ dbgs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate << ", continuing at "
+ << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = nullptr;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ if (auto *MN = dyn_cast<MemSDNode>(N))
+ MatchedMemRefs.push_back(MN->getMemOperand());
+ else {
+ LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG);
+ dbgs() << '\n');
+ }
+
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveChild0: case OPC_MoveChild1:
+ case OPC_MoveChild2: case OPC_MoveChild3:
+ case OPC_MoveChild4: case OPC_MoveChild5:
+ case OPC_MoveChild6: case OPC_MoveChild7: {
+ unsigned ChildNo = Opcode-OPC_MoveChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+
+ case OPC_CheckChild0Same: case OPC_CheckChild1Same:
+ case OPC_CheckChild2Same: case OPC_CheckChild3Same:
+ if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
+ Opcode-OPC_CheckChild0Same))
+ break;
+ continue;
+
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckPredicateWithOperands: {
+ unsigned OpNum = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Operands;
+
+ for (unsigned i = 0; i < OpNum; ++i)
+ Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);
+
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
+ break;
+ continue;
+ }
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+
+ // If target can modify DAG during matching, keep the matching state
+ // consistent.
+ std::unique_ptr<MatchStateUpdater> MSU;
+ if (ComplexPatternFuncMutatesDAG())
+ MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
+ MatchScopes));
+
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+
+ case OPC_CheckTypeRes: {
+ unsigned Res = MatcherTable[MatcherIndex++];
+ if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ }
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (true) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to "
+ << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getSimpleValueType();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (true) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex
+ << '\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout(),
+ Opcode - OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckChild2CondCode:
+ if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
+ case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
+ case OPC_CheckChild4Integer:
+ if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
+ Opcode-OPC_CheckChild0Integer)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckImmAllOnesV:
+ if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+ continue;
+ case OPC_CheckImmAllZerosV:
+ if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].getNode()->hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
+ VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1
+ case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0;
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg:
+ case OPC_EmitCopyToReg2: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+ if (Opcode == OPC_EmitCopyToReg2)
+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
+
+ if (!InputChain.getNode())
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
+ continue;
+ }
+ case OPC_Coverage: {
+ // This is emitted right before MorphNode/EmitNode.
+ // So it should be safe to assume that this node has been selected
+ unsigned index = MatcherTable[MatcherIndex++];
+ index |= (MatcherTable[MatcherIndex++] << 8);
+ dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
+ dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
+ continue;
+ }
+
+ case OPC_EmitNode: case OPC_MorphNodeTo:
+ case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
+ case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs;
+ // If this is one of the compressed forms, get the number of VTs based
+ // on the Opcode. Otherwise read the next byte from the table.
+ if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
+ NumVTs = Opcode - OPC_MorphNodeTo0;
+ else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
+ NumVTs = Opcode - OPC_EmitNode0;
+ else
+ NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR)
+ VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs);
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
+ Ops.push_back(InputGlue);
+
+ // Create the node.
+ MachineSDNode *Res = nullptr;
+ bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
+ (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
+ if (!IsMorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch),
+ VTList, Ops);
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ nullptr));
+ }
+ } else {
+ assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
+ "NodeToMatch was removed partway through selection");
+ SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
+ SDNode *E) {
+ CurDAG->salvageDebugInfo(*N);
+ auto &Chain = ChainNodesMatched;
+ assert((!E || !is_contained(Chain, N)) &&
+ "Chain node replaced during MorphNode");
+ Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+ });
+ Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
+ Ops, EmitNodeInfo));
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TII->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ // We expect to have relatively few of these so just filter them into a
+ // temporary buffer so that we can easily add them to the instruction.
+ SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
+ for (MachineMemOperand *MMO : MatchedMemRefs) {
+ if (MMO->isLoad()) {
+ if (mayLoad)
+ FilteredMemRefs.push_back(MMO);
+ } else if (MMO->isStore()) {
+ if (mayStore)
+ FilteredMemRefs.push_back(MMO);
+ } else {
+ FilteredMemRefs.push_back(MMO);
+ }
+ }
+
+ CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
+ }
+
+ LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
+ << " Dropping mem operands\n";
+ dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: ";
+ Res->dump(CurDAG););
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (IsMorphNodeTo) {
+ // Update chain uses.
+ UpdateChains(Res, InputChain, ChainNodesMatched, true);
+ return;
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueSizeInBits()) &&
+ "invalid replacement");
+ ReplaceUses(SDValue(NodeToMatch, i), Res);
+ }
+
+ // Update chain uses.
+ UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false);
+
+ // If the root node defines glue, we need to update it to the glue result.
+ // TODO: This never happens in our tests and I think it can be removed /
+ // replaced with an assert, but if we do it this the way the change is
+ // NFC.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
+ MVT::Glue &&
+ InputGlue.getNode())
+ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1),
+ InputGlue);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+ CurDAG->RemoveDeadNode(NodeToMatch);
+
+ return;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex
+ << "\n");
+ ++NumDAGIselRetries;
+ while (true) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return false;
+
+ // Detect when "or" is used to add an offset to a stack object.
+ if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ unsigned A = MFI.getObjectAlignment(FN->getIndex());
+ assert(isPowerOf2_32(A) && "Unexpected alignment");
+ int32_t Off = C->getSExtValue();
+ // If the alleged offset fits in the zero bits guaranteed by
+ // the alignment, then this or is really an add.
+ return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+ }
+ return false;
+}
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getName();
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000000000..cdc09d59f6a49
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,304 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "dag-printer"
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
new file mode 100644
index 0000000000000..3a2df6f60593a
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -0,0 +1,17 @@
+//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGTargetInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+using namespace llvm;
+
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
new file mode 100644
index 0000000000000..fad98b6f50dc1
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -0,0 +1,1067 @@
+//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StatepointLowering.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "statepoint-lowering"
+
+STATISTIC(NumSlotsAllocatedForStatepoints,
+ "Number of stack slots allocated for statepoints");
+STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
+STATISTIC(StatepointMaxSlotsRequired,
+ "Maximum number of stack slots required for a singe statepoint");
+
+static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
+ SelectionDAGBuilder &Builder, uint64_t Value) {
+ SDLoc L = Builder.getCurSDLoc();
+ Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L,
+ MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64));
+}
+
+void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+ // Consistency check
+ assert(PendingGCRelocateCalls.empty() &&
+ "Trying to visit statepoint before finished processing previous one");
+ Locations.clear();
+ NextSlotToAllocate = 0;
+ // Need to resize this on each safepoint - we need the two to stay in sync and
+ // the clear patterns of a SelectionDAGBuilder have no relation to
+ // FunctionLoweringInfo. Also need to ensure used bits get cleared.
+ AllocatedStackSlots.clear();
+ AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+}
+
+void StatepointLoweringState::clear() {
+ Locations.clear();
+ AllocatedStackSlots.clear();
+ assert(PendingGCRelocateCalls.empty() &&
+ "cleared before statepoint sequence completed");
+}
+
+SDValue
+StatepointLoweringState::allocateStackSlot(EVT ValueType,
+ SelectionDAGBuilder &Builder) {
+ NumSlotsAllocatedForStatepoints++;
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+
+ unsigned SpillSize = ValueType.getStoreSize();
+ assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
+
+ // First look for a previously created stack slot which is not in
+ // use (accounting for the fact arbitrary slots may already be
+ // reserved), or to create a new stack slot and use it.
+
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
+
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
+ if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
+ const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
+ if (MFI.getObjectSize(FI) == SpillSize) {
+ AllocatedStackSlots.set(NextSlotToAllocate);
+ // TODO: Is ValueType the right thing to use here?
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
+ }
+ }
+
+ // Couldn't find a free slot, so create a new one:
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MFI.markAsStatepointSpillSlotObjectIndex(FI);
+
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+ AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true);
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ StatepointMaxSlotsRequired.updateMax(
+ Builder.FuncInfo.StatepointStackSlots.size());
+
+ return SpillSlot;
+}
+
+/// Utility function for reservePreviousStackSlotForValue. Tries to find
+/// stack slot index to which we have spilled value for previous statepoints.
+/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
+static Optional<int> findPreviousSpillSlot(const Value *Val,
+ SelectionDAGBuilder &Builder,
+ int LookUpDepth) {
+ // Can not look any further - give up now
+ if (LookUpDepth <= 0)
+ return None;
+
+ // Spill location is known for gc relocates
+ if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
+ const auto &SpillMap =
+ Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()];
+
+ auto It = SpillMap.find(Relocate->getDerivedPtr());
+ if (It == SpillMap.end())
+ return None;
+
+ return It->second;
+ }
+
+ // Look through bitcast instructions.
+ if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val))
+ return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
+
+ // Look through phi nodes
+ // All incoming values should have same known stack slot, otherwise result
+ // is unknown.
+ if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
+ Optional<int> MergedResult = None;
+
+ for (auto &IncomingValue : Phi->incoming_values()) {
+ Optional<int> SpillSlot =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
+ if (!SpillSlot.hasValue())
+ return None;
+
+ if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+ return None;
+
+ MergedResult = SpillSlot;
+ }
+ return MergedResult;
+ }
+
+ // TODO: We can do better for PHI nodes. In cases like this:
+ // ptr = phi(relocated_pointer, not_relocated_pointer)
+ // statepoint(ptr)
+ // We will return that stack slot for ptr is unknown. And later we might
+ // assign different stack slots for ptr and relocated_pointer. This limits
+ // llvm's ability to remove redundant stores.
+ // Unfortunately it's hard to accomplish in current infrastructure.
+ // We use this function to eliminate spill store completely, while
+ // in example we still need to emit store, but instead of any location
+ // we need to use special "preferred" location.
+
+ // TODO: handle simple updates. If a value is modified and the original
+ // value is no longer live, it would be nice to put the modified value in the
+ // same slot. This allows folding of the memory accesses for some
+ // instructions types (like an increment).
+ // statepoint (i)
+ // i1 = i+1
+ // statepoint (i1)
+ // However we need to be careful for cases like this:
+ // statepoint(i)
+ // i1 = i+1
+ // statepoint(i, i1)
+ // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just
+ // put handling of simple modifications in this function like it's done
+ // for bitcasts we might end up reserving i's slot for 'i+1' because order in
+ // which we visit values is unspecified.
+
+ // Don't know any information about this instruction
+ return None;
+}
+
+/// Try to find existing copies of the incoming values in stack slots used for
+/// statepoint spilling. If we can find a spill slot for the incoming value,
+/// mark that slot as allocated, and reuse the same slot for this safepoint.
+/// This helps to avoid series of loads and stores that only serve to reshuffle
+/// values on the stack between calls.
+static void reservePreviousStackSlotForValue(const Value *IncomingValue,
+ SelectionDAGBuilder &Builder) {
+ SDValue Incoming = Builder.getValue(IncomingValue);
+
+ if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
+ // We won't need to spill this, so no need to check for previously
+ // allocated stack slots
+ return;
+ }
+
+ SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
+ if (OldLocation.getNode())
+ // Duplicates in input
+ return;
+
+ const int LookUpDepth = 6;
+ Optional<int> Index =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
+ if (!Index.hasValue())
+ return;
+
+ const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
+
+ auto SlotIt = find(StatepointSlots, *Index);
+ assert(SlotIt != StatepointSlots.end() &&
+ "Value spilled to the unknown stack slot");
+
+ // This is one of our dedicated lowering slots
+ const int Offset = std::distance(StatepointSlots.begin(), SlotIt);
+ if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+ // stack slot already assigned to someone else, can't use it!
+ // TODO: currently we reserve space for gc arguments after doing
+ // normal allocation for deopt arguments. We should reserve for
+ // _all_ deopt and gc arguments, then start allocating. This
+ // will prevent some moves being inserted when vm state changes,
+ // but gc state doesn't between two calls.
+ return;
+ }
+ // Reserve this stack slot
+ Builder.StatepointLowering.reserveStackSlot(Offset);
+
+ // Cache this slot so we find it when going through the normal
+ // assignment loop.
+ SDValue Loc =
+ Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy());
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+}
+
+/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
+/// is not required for correctness. It's purpose is to reduce the size of
+/// StackMap section. It has no effect on the number of spill slots required
+/// or the actual lowering.
+static void
+removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const GCRelocateInst *> &Relocs,
+ SelectionDAGBuilder &Builder,
+ FunctionLoweringInfo::StatepointSpillMap &SSM) {
+ DenseMap<SDValue, const Value *> Seen;
+
+ SmallVector<const Value *, 64> NewBases, NewPtrs;
+ SmallVector<const GCRelocateInst *, 64> NewRelocs;
+ for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
+ SDValue SD = Builder.getValue(Ptrs[i]);
+ auto SeenIt = Seen.find(SD);
+
+ if (SeenIt == Seen.end()) {
+ // Only add non-duplicates
+ NewBases.push_back(Bases[i]);
+ NewPtrs.push_back(Ptrs[i]);
+ NewRelocs.push_back(Relocs[i]);
+ Seen[SD] = Ptrs[i];
+ } else {
+ // Duplicate pointer found, note in SSM and move on:
+ SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
+ }
+ }
+ assert(Bases.size() >= NewBases.size());
+ assert(Ptrs.size() >= NewPtrs.size());
+ assert(Relocs.size() >= NewRelocs.size());
+ Bases = NewBases;
+ Ptrs = NewPtrs;
+ Relocs = NewRelocs;
+ assert(Ptrs.size() == Bases.size());
+ assert(Ptrs.size() == Relocs.size());
+}
+
+/// Extract call from statepoint, lower it and return pointer to the
+/// call node. Also update NodeMap so that getValue(statepoint) will
+/// reference lowered call result
+static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
+ SDValue ReturnValue, CallEndVal;
+ std::tie(ReturnValue, CallEndVal) =
+ Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
+ SDNode *CallEnd = CallEndVal.getNode();
+
+ // Get a call instruction from the call sequence chain. Tail calls are not
+ // allowed. The following code is essentially reverse engineering X86's
+ // LowerCallTo.
+ //
+ // We are expecting DAG to have the following form:
+ //
+ // ch = eh_label (only in case of invoke statepoint)
+ // ch, glue = callseq_start ch
+ // ch, glue = X86::Call ch, glue
+ // ch, glue = callseq_end ch, glue
+ // get_return_value ch, glue
+ //
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
+
+ bool HasDef = !SI.CLI.RetTy->isVoidTy();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
+
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
+ return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
+}
+
+static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
+ FrameIndexSDNode &FI) {
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex());
+ auto MMOFlags = MachineMemOperand::MOStore |
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ auto &MFI = MF.getFrameInfo();
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ MFI.getObjectSize(FI.getIndex()),
+ MFI.getObjectAlignment(FI.getIndex()));
+}
+
+/// Spill a value incoming to the statepoint. It might be either part of
+/// vmstate
+/// or gcstate. In both cases unconditionally spill it on the stack unless it
+/// is a null constant. Return pair with first element being frame index
+/// containing saved value and second element with outgoing chain from the
+/// emitted store
+static std::tuple<SDValue, SDValue, MachineMemOperand*>
+spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
+ SelectionDAGBuilder &Builder) {
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ MachineMemOperand* MMO = nullptr;
+
+ // Emit new store if we didn't do it for this ptr before
+ if (!Loc.getNode()) {
+ Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
+ Builder);
+ int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
+ // We use TargetFrameIndex so that isel will not select it into LEA
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
+
+ // Right now we always allocate spill slots that are of the same
+ // size as the value we're about to spill (the size of spillee can
+ // vary since we spill vectors of pointers too). At some point we
+ // can consider allowing spills of smaller values to larger slots
+ // (i.e. change the '==' in the assert below to a '>=').
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
+ "Bad spill: stack slot does not match!");
+
+ // Note: Using the alignment of the spill slot (rather than the abi or
+ // preferred alignment) is required for correctness when dealing with spill
+ // slots with preferred alignments larger than frame alignment..
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *StoreMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
+ Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
+ StoreMMO);
+
+ MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+
+ assert(Loc.getNode());
+ return std::make_tuple(Loc, Chain, MMO);
+}
+
+/// Lower a single value incoming to a statepoint node. This value can be
+/// either a deopt value or a gc value, the handling is the same. We special
+/// case constants and allocas, then fall back to spilling if required.
+static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
+ SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs,
+ SelectionDAGBuilder &Builder) {
+ // Note: We know all of these spills are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
+ SDValue Chain = Builder.getRoot();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ // If the original value was a constant, make sure it gets recorded as
+ // such in the stackmap. This is required so that the consumer can
+ // parse any internal format to the deopt state. It also handles null
+ // pointers and other constant pointers in GC states. Note the constant
+ // vectors do not appear to actually hit this path and that anything larger
+ // than an i64 value (not type!) will fail asserts here.
+ pushStackMapConstant(Ops, Builder, C->getSExtValue());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint (this is only
+ // really meaningful for a deopt value. For GC, we'd be trying to
+ // relocate the address of the alloca itself?)
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+
+ } else if (LiveInOnly) {
+ // If this value is live in (not live-on-return, or live-through), we can
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
+ // handled by the register allocator. Note that we do not have the notion
+ // of a late use so these values might be placed in registers which are
+ // clobbered by the call. This is fine for live-in.
+ Ops.push_back(Incoming);
+ } else {
+ // Otherwise, locate a spill slot and explicitly spill it so it
+ // can be found by the runtime later. We currently do not support
+ // tracking values through callee saved registers to their eventual
+ // spill location. This would be a useful optimization, but would
+ // need to be optional since it requires a lot of complexity on the
+ // runtime side which not all would support.
+ auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
+ Ops.push_back(std::get<0>(Res));
+ if (auto *MMO = std::get<2>(Res))
+ MemRefs.push_back(MMO);
+ Chain = std::get<1>(Res);;
+ }
+
+ Builder.DAG.setRoot(Chain);
+}
+
+/// Lower deopt state and gc pointer arguments of the statepoint. The actual
+/// lowering is described in lowerIncomingStatepointValue. This function is
+/// responsible for lowering everything in the right position and playing some
+/// tricks to avoid redundant stack manipulation where possible. On
+/// completion, 'Ops' will contain ready to use operands for machine code
+/// statepoint. The chain nodes will have already been created and the DAG root
+/// will be set to the last value spilled (if any were).
+static void
+lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder) {
+ // Lower the deopt and gc arguments for this statepoint. Layout will be:
+ // deopt argument length, deopt arguments.., gc arguments...
+#ifndef NDEBUG
+ if (auto *GFI = Builder.GFI) {
+ // Check that each of the gc pointer and bases we've gotten out of the
+ // safepoint is something the strategy thinks might be a pointer (or vector
+ // of pointers) into the GC heap. This is basically just here to help catch
+ // errors during statepoint insertion. TODO: This should actually be in the
+ // Verifier, but we can't get to the GCStrategy from there (yet).
+ GCStrategy &S = GFI->getStrategy();
+ for (const Value *V : SI.Bases) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed base pointer found in statepoint");
+ }
+ }
+ for (const Value *V : SI.Ptrs) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed derived pointer found in statepoint");
+ }
+ }
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
+ } else {
+ assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
+ assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
+ }
+#endif
+
+ // Figure out what lowering strategy we're going to use for each part
+ // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // as "live-through". A "live-through" variable is one which is "live-in",
+ // "live-out", and live throughout the lifetime of the call (i.e. we can find
+ // it from any PC within the transitive callee of the statepoint). In
+ // particular, if the callee spills callee preserved registers we may not
+ // be able to find a value placed in that register during the call. This is
+ // fine for live-out, but not for live-through. If we were willing to make
+ // assumptions about the code generator producing the callee, we could
+ // potentially allow live-through values in callee saved registers.
+ const bool LiveInDeopt =
+ SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+ auto isGCValue =[&](const Value *V) {
+ return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+ };
+
+ // Before we actually start lowering (and allocating spill slots for values),
+ // reserve any stack slots which we judge to be profitable to reuse for a
+ // particular value. This is purely an optimization over the code below and
+ // doesn't change semantics at all. It is important for performance that we
+ // reserve slots for both deopt and gc values before lowering either.
+ for (const Value *V : SI.DeoptState) {
+ if (!LiveInDeopt || isGCValue(V))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ reservePreviousStackSlotForValue(SI.Bases[i], Builder);
+ reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
+ }
+
+ // First, prefix the list with the number of unique values to be
+ // lowered. Note that this is the number of *Values* not the
+ // number of SDValues required to lower them.
+ const int NumVMSArgs = SI.DeoptState.size();
+ pushStackMapConstant(Ops, Builder, NumVMSArgs);
+
+ // The vm state arguments are lowered in an opaque manner. We do not know
+ // what type of values are contained within.
+ for (const Value *V : SI.DeoptState) {
+ SDValue Incoming;
+ // If this is a function argument at a static frame index, generate it as
+ // the frame index.
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
+ Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy());
+ }
+ if (!Incoming.getNode())
+ Incoming = Builder.getValue(V);
+ const bool LiveInValue = LiveInDeopt && !isGCValue(V);
+ lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
+ }
+
+ // Finally, go ahead and lower all the gc arguments. There's no prefixed
+ // length for this one. After lowering, we'll have the base and pointer
+ // arrays interwoven with each (lowered) base pointer immediately followed by
+ // it's (lowered) derived pointer. i.e
+ // (base[0], ptr[0], base[1], ptr[1], ...)
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ const Value *Base = SI.Bases[i];
+ lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
+ Ops, MemRefs, Builder);
+
+ const Value *Ptr = SI.Ptrs[i];
+ lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
+ Ops, MemRefs, Builder);
+ }
+
+ // If there are any explicit spill slots passed to the statepoint, record
+ // them, but otherwise do not do anything special. These are user provided
+ // allocas and give control over placement to the consumer. In this case,
+ // it is the contents of the slot which may get updated, not the pointer to
+ // the alloca
+ for (Value *V : SI.GCArgs) {
+ SDValue Incoming = Builder.getValue(V);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+ }
+ }
+
+ // Record computed locations for all lowered values.
+ // This can not be embedded in lowering loops as we need to record *all*
+ // values, while previous loops account only values with unique SDValues.
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
+
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
+ const Value *V = Relocate->getDerivedPtr();
+ SDValue SDV = Builder.getValue(V);
+ SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
+
+ if (Loc.getNode()) {
+ SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+ } else {
+ // Record value as visited, but not spilled. This is case for allocas
+ // and constants. For this values we can avoid emitting spill load while
+ // visiting corresponding gc_relocate.
+ // Actually we do not need to record them in this map at all.
+ // We do this only to check that we are not relocating any unvisited
+ // value.
+ SpillMap.SlotMap[V] = None;
+
+ // Default llvm mechanisms for exporting values which are used in
+ // different basic blocks does not work for gc relocates.
+ // Note that it would be incorrect to teach llvm that all relocates are
+ // uses of the corresponding values so that it would automatically
+ // export them. Relocates of the spilled values does not use original
+ // value.
+ if (Relocate->getParent() != StatepointInstr->getParent())
+ Builder.ExportFromCurrentBlock(V);
+ }
+ }
+}
+
+SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI) {
+ // The basic scheme here is that information about both the original call and
+ // the safepoint is encoded in the CallInst. We create a temporary call and
+ // lower it, then reverse engineer the calling sequence.
+
+ NumOfStatepoints++;
+ // Clear state
+ StatepointLowering.startNewStatepoint(*this);
+
+#ifndef NDEBUG
+ // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
+ // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
+ for (auto *Reloc : SI.GCRelocates)
+ if (Reloc->getParent() == SI.StatepointInstr->getParent())
+ StatepointLowering.scheduleRelocCall(*Reloc);
+#endif
+
+ // Remove any redundant llvm::Values which map to the same SDValue as another
+ // input. Also has the effect of removing duplicates in the original
+ // llvm::Value input list as well. This is a useful optimization for
+ // reducing the size of the StackMap section. It has no other impact.
+ removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
+ FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
+ assert(SI.Bases.size() == SI.Ptrs.size() &&
+ SI.Ptrs.size() == SI.GCRelocates.size());
+
+ // Lower statepoint vmstate and gcstate arguments
+ SmallVector<SDValue, 10> LoweredMetaArgs;
+ SmallVector<MachineMemOperand*, 16> MemRefs;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
+
+ // Now that we've emitted the spills, we need to update the root so that the
+ // call sequence is ordered correctly.
+ SI.CLI.setChain(getRoot());
+
+ // Get call node, we will replace it later with statepoint
+ SDValue ReturnVal;
+ SDNode *CallNode;
+ std::tie(ReturnVal, CallNode) =
+ lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);
+
+ // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
+ // nodes with all the appropriate arguments and return values.
+
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ SDValue Chain = CallNode->getOperand(0);
+
+ SDValue Glue;
+ bool CallHasIncomingGlue = CallNode->getGluedNode();
+ if (CallHasIncomingGlue) {
+ // Glue is always last operand
+ Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+ }
+
+ // Build the GC_TRANSITION_START node if necessary.
+ //
+ // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
+ // order in which they appear in the call to the statepoint intrinsic. If
+ // any of the operands is a pointer-typed, that operand is immediately
+ // followed by a SRCVALUE for the pointer that may be used during lowering
+ // (e.g. to form MachinePointerInfo values for loads/stores).
+ const bool IsGCTransition =
+ (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) ==
+ (uint64_t)StatepointFlags::GCTransition;
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TSOps;
+
+ // Add chain
+ TSOps.push_back(Chain);
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TSOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TSOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue if necessary
+ if (CallHasIncomingGlue)
+ TSOps.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);
+
+ Chain = GCTransitionStart.getValue(0);
+ Glue = GCTransitionStart.getValue(1);
+ }
+
+ // TODO: Currently, all of these operands are being marked as read/write in
+ // PrologEpilougeInserter.cpp, we should special case the VMState arguments
+ // and flags to be read-only.
+ SmallVector<SDValue, 40> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32));
+
+ // Calculate and push starting position of vmstate arguments
+ // Get number of arguments incoming directly into call node
+ unsigned NumCallRegArgs =
+ CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));
+
+ // Add call target
+ SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
+ Ops.push_back(CallTarget);
+
+ // Add call arguments
+ // Get position of register mask in the call
+ SDNode::op_iterator RegMaskIt;
+ if (CallHasIncomingGlue)
+ RegMaskIt = CallNode->op_end() - 2;
+ else
+ RegMaskIt = CallNode->op_end() - 1;
+ Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
+
+ // Add a constant argument for the calling convention
+ pushStackMapConstant(Ops, *this, SI.CLI.CallConv);
+
+ // Add a constant argument for the flags
+ uint64_t Flags = SI.StatepointFlags;
+ assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) &&
+ "Unknown flag used");
+ pushStackMapConstant(Ops, *this, Flags);
+
+ // Insert all vmstate and gcstate arguments
+ Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
+
+ // Add register mask from call node
+ Ops.push_back(*RegMaskIt);
+
+ // Add chain
+ Ops.push_back(Chain);
+
+ // Same for the glue, but we add it only if original call had it
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Compute return values. Provide a glue output since we consume one as
+ // input. This allows someone else to chain off us as needed.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ MachineSDNode *StatepointMCNode =
+ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
+
+ SDNode *SinkNode = StatepointMCNode;
+
+ // Build the GC_TRANSITION_END node if necessary.
+ //
+ // See the comment above regarding GC_TRANSITION_START for the layout of
+ // the operands to the GC_TRANSITION_END node.
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TEOps;
+
+ // Add chain
+ TEOps.push_back(SDValue(StatepointMCNode, 0));
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TEOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TEOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue
+ TEOps.push_back(SDValue(StatepointMCNode, 1));
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);
+
+ SinkNode = GCTransitionStart.getNode();
+ }
+
+ // Replace original call
+ DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+ // Remove original call node
+ DAG.DeleteNode(CallNode);
+
+ // DON'T set the root - under the assumption that it's already set past the
+ // inserted node we created.
+
+ // TODO: A better future implementation would be to emit a single variable
+ // argument, variable return value STATEPOINT node here and then hookup the
+ // return value of each gc.relocate to the respective output of the
+ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear
+ // to actually be possible today.
+
+ return ReturnVal;
+}
+
+void
+SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+ const BasicBlock *EHPadBB /*= nullptr*/) {
+ assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
+ "anyregcc is not supported on statepoints!");
+
+#ifndef NDEBUG
+ // If this is a malformed statepoint, report it early to simplify debugging.
+ // This should catch any IR level mistake that's made when constructing or
+ // transforming statepoints.
+ ISP.verify();
+
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+#endif
+
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &DL = DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS));
+ } else {
+ ActualCallee = getValue(ISP.getCalledValue());
+ }
+
+ StatepointLoweringInfo SI(DAG);
+ populateCallLoweringInfo(SI.CLI, ISP.getCall(),
+ ImmutableStatepoint::CallArgsBeginPos,
+ ISP.getNumCallArgs(), ActualCallee,
+ ISP.getActualReturnType(), false /* IsPatchPoint */);
+
+ for (const GCRelocateInst *Relocate : ISP.getRelocates()) {
+ SI.GCRelocates.push_back(Relocate);
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
+
+ SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.StatepointInstr = ISP.getInstruction();
+ SI.GCTransitionArgs =
+ ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.ID = ISP.getID();
+ SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
+ SI.StatepointFlags = ISP.getFlags();
+ SI.NumPatchBytes = ISP.getNumPatchBytes();
+ SI.EHPadBB = EHPadBB;
+
+ SDValue ReturnValue = LowerAsSTATEPOINT(SI);
+
+ // Export the result value if needed
+ const GCResultInst *GCResult = ISP.getGCResult();
+ Type *RetTy = ISP.getActualReturnType();
+ if (!RetTy->isVoidTy() && GCResult) {
+ if (GCResult->getParent() != ISP.getCall()->getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now. Default exporting mechanism will not work here because
+ // statepoint call has a different type than the actual call. It means
+ // that by default llvm will create export register of the wrong type
+ // (always i32 in our case). So instead we need to create export register
+ // with correct type manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy,
+ ISP.getCall()->getCallingConv());
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[ISP.getInstruction()] = Reg;
+ } else {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
+ // this value.
+ setValue(ISP.getInstruction(), ReturnValue);
+ }
+ } else {
+ // The token value is never used from here on, just generate a poison value
+ setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
+ bool VarArgDisallowed, bool ForceVoidReturnTy) {
+ StatepointLoweringInfo SI(DAG);
+ unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
+ populateCallLoweringInfo(
+ SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
+ false);
+ if (!VarArgDisallowed)
+ SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
+
+ auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt);
+
+ unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
+
+ auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
+ SI.ID = SD.StatepointID.getValueOr(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
+
+ SI.DeoptState =
+ ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
+ SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
+ SI.EHPadBB = EHPadBB;
+
+ // NB! The GC arguments are deliberately left empty.
+
+ if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
+ setValue(Call, ReturnVal);
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB,
+ /* VarArgDisallowed = */ false,
+ /* ForceVoidReturnTy = */ false);
+}
+
+void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
+ // The result value of the gc_result is simply the result of the actual
+ // call. We've already emitted this, so just grab the value.
+ const Instruction *I = CI.getStatepoint();
+
+ if (I->getParent() != CI.getParent()) {
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actual call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ PointerType *CalleeType = cast<PointerType>(
+ ImmutableStatepoint(I).getCalledValue()->getType());
+ Type *RetTy =
+ cast<FunctionType>(CalleeType->getElementType())->getReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
+ } else {
+ setValue(&CI, getValue(I));
+ }
+}
+
+void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+#ifndef NDEBUG
+ // Consistency check
+ // We skip this check for relocates not in the same basic block as their
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
+ StatepointLowering.relocCallVisited(Relocate);
+
+ auto *Ty = Relocate.getType()->getScalarType();
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ assert(*IsManaged && "Non gc managed pointer relocated!");
+#endif
+
+ const Value *DerivedPtr = Relocate.getDerivedPtr();
+ SDValue SD = getValue(DerivedPtr);
+
+ auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
+ auto SlotIt = SpillMap.find(DerivedPtr);
+ assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
+ Optional<int> DerivedPtrLocation = SlotIt->second;
+
+ // We didn't need to spill these special cases (constants and allocas).
+ // See the handling in spillIncomingValueForStatepoint for detail.
+ if (!DerivedPtrLocation) {
+ setValue(&Relocate, SD);
+ return;
+ }
+
+ unsigned Index = *DerivedPtrLocation;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
+
+ // Note: We know all of these reloads are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
+ SDValue Chain = getRoot();
+
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
+ SpillSlot, LoadMMO);
+
+ DAG.setRoot(SpillLoad.getValue(1));
+
+ assert(SpillLoad.getNode());
+ setValue(&Relocate, SpillLoad);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // We don't lower calls to __llvm_deoptimize as varargs, but as a regular
+ // call. We also do not lower the return value to any virtual register, and
+ // change the immediately following return to a trap instruction.
+ LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr,
+ /* VarArgDisallowed = */ true,
+ /* ForceVoidReturnTy = */ true);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizingReturn() {
+ // We do not lower the return value from llvm.deoptimize to any virtual
+ // register, and change the immediately following return to a trap
+ // instruction.
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
new file mode 100644
index 0000000000000..70507932681d0
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -0,0 +1,128 @@
+//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+class CallInst;
+class SelectionDAGBuilder;
+
+/// This class tracks both per-statepoint and per-selectiondag information.
+/// For each statepoint it tracks locations of it's gc valuess (incoming and
+/// relocated) and list of gcreloc calls scheduled for visiting (this is
+/// used for a debug mode consistency check only). The spill slot tracking
+/// works in concert with information in FunctionLoweringInfo.
+class StatepointLoweringState {
+public:
+ StatepointLoweringState() = default;
+
+ /// Reset all state tracking for a newly encountered safepoint. Also
+ /// performs some consistency checking.
+ void startNewStatepoint(SelectionDAGBuilder &Builder);
+
+ /// Clear the memory usage of this object. This is called from
+ /// SelectionDAGBuilder::clear. We require this is never called in the
+ /// midst of processing a statepoint sequence.
+ void clear();
+
+ /// Returns the spill location of a value incoming to the current
+ /// statepoint. Will return SDValue() if this value hasn't been
+ /// spilled. Otherwise, the value has already been spilled and no
+ /// further action is required by the caller.
+ SDValue getLocation(SDValue Val) {
+ auto I = Locations.find(Val);
+ if (I == Locations.end())
+ return SDValue();
+ return I->second;
+ }
+
+ void setLocation(SDValue Val, SDValue Location) {
+ assert(!Locations.count(Val) &&
+ "Trying to allocate already allocated location");
+ Locations[Val] = Location;
+ }
+
+ /// Record the fact that we expect to encounter a given gc_relocate
+ /// before the next statepoint. If we don't see it, we'll report
+ /// an assertion.
+ void scheduleRelocCall(const CallInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (!RelocCall.use_empty())
+ PendingGCRelocateCalls.push_back(&RelocCall);
+ }
+
+ /// Remove this gc_relocate from the list we're expecting to see
+ /// before the next statepoint. If we weren't expecting to see
+ /// it, we'll report an assertion.
+ void relocCallVisited(const CallInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (RelocCall.use_empty())
+ return;
+ auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
+ assert(I != PendingGCRelocateCalls.end() &&
+ "Visited unexpected gcrelocate call");
+ PendingGCRelocateCalls.erase(I);
+ }
+
+ // TODO: Should add consistency tracking to ensure we encounter
+ // expected gc_result calls too.
+
+ /// Get a stack slot we can use to store an value of type ValueType. This
+ /// will hopefully be a recylced slot from another statepoint.
+ SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);
+
+ void reserveStackSlot(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ assert(!AllocatedStackSlots.test(Offset) && "already reserved!");
+ assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
+ AllocatedStackSlots.set(Offset);
+ }
+
+ bool isStackSlotAllocated(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ return AllocatedStackSlots.test(Offset);
+ }
+
+private:
+ /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
+ /// into it's location (currently only stack slots)
+ DenseMap<SDValue, SDValue> Locations;
+
+ /// A boolean indicator for each slot listed in the FunctionInfo as to
+ /// whether it has been used in the current statepoint. Since we try to
+ /// preserve stack slots across safepoints, there can be gaps in which
+ /// slots have been allocated.
+ SmallBitVector AllocatedStackSlots;
+
+ /// Points just beyond the last slot known to have been allocated
+ unsigned NextSlotToAllocate = 0;
+
+ /// Keep track of pending gcrelocate calls for consistency check
+ SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000000000..9ab1324533f1e
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,7368 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cctype>
+using namespace llvm;
+
+/// NOTE: The TargetMachine owns TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm)
+ : TargetLoweringBase(tm) {}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return nullptr;
+}
+
+bool TargetLowering::isPositionIndependent() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function &F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
+ AttributeList CallerAttrs = F.getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
+ .hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
+}
+
+bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
+ const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<SDValue> &OutVals) const {
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &ArgLoc = ArgLocs[I];
+ if (!ArgLoc.isRegLoc())
+ continue;
+ Register Reg = ArgLoc.getLocReg();
+ // Only look at callee saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+ // Check that we pass the value used for the caller.
+ // (We look for a CopyFromReg reading a virtual register that is used
+ // for the function live-in value of register Reg)
+ SDValue Value = OutVals[I];
+ if (Value->getOpcode() != ISD::CopyFromReg)
+ return false;
+ unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ if (MRI.getLiveInPhysReg(ArgReg) != Reg)
+ return false;
+ }
+ return true;
+}
+
+/// Set CallLoweringInfo attribute flags based on a call instruction
+/// and called function attributes.
+void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
+ unsigned ArgIdx) {
+ IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ Alignment = Call->getParamAlignment(ArgIdx);
+ ByValType = nullptr;
+ if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ ByValType = Call->getParamByValType(ArgIdx);
+}
+
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+std::pair<SDValue, SDValue>
+TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
+ ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(Ops.size());
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ SDValue NewOp = Ops[i];
+ Entry.Node = NewOp;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
+ CallOptions.IsSExt);
+ Entry.IsZExt = !Entry.IsSExt;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
+ Entry.IsSExt = Entry.IsZExt = false;
+ }
+ Args.push_back(Entry);
+ }
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported library call operation!");
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
+ bool zeroExtend = !signExtend;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
+ signExtend = zeroExtend = false;
+ }
+
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(CallOptions.DoesNotReturn)
+ .setDiscardResult(!CallOptions.IsReturnValueUsed)
+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
+ .setSExtResult(signExtend)
+ .setZExtResult(zeroExtend);
+ return LowerCallTo(CLI);
+}
+
+bool
+TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const {
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+ return false;
+
+ EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ FuncAttributes);
+
+ if (VT == MVT::Other) {
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
+ MVT LVT = MVT::i64;
+ while (!isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 :
+ (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+ (VT == MVT::f64) ? RTLIB::O_F64 :
+ (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
+ break;
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
+ switch (CCCode) {
+ case ISD::SETULT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETULE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { OldLHS.getValueType(),
+ OldRHS.getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
+ NewRHS = DAG.getConstant(0, dl, RetVT);
+
+ CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
+ NewLHS = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (!isPositionIndependent())
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
+
+ return Table;
+}
+
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ const TargetMachine &TM = getTargetMachine();
+ const GlobalValue *GV = GA->getGlobal();
+
+ // If the address is not even local to this DSO we will have to load it from
+ // a got and then add the offset.
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return false;
+
+ // If the code is position independent we will have to add a base register.
+ if (isPositionIndependent())
+ return false;
+
+ // Otherwise we can do it.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// If the specified instruction has a constant integer operand and there are
+/// bits set in that constant that are not demanded, then clear those bits and
+/// return true.
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+
+ // Do target-specific constant optimization.
+ if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+ return TLO.New.getNode();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Opcode) {
+ default:
+ break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Op1C)
+ return false;
+
+ // If this is a 'not' op, don't touch it because that's a canonical form.
+ const APInt &C = Op1C->getAPIntValue();
+ if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
+ return false;
+
+ if (!C.isSubsetOf(Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
+bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+ const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ SelectionDAG &DAG = TLO.DAG;
+ SDLoc dl(Op);
+
+ // Early return, as this function cannot handle vector types.
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DemandedSize = Demanded.getActiveBits();
+ unsigned SmallVTBits = DemandedSize;
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(
+ Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
+ assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
+ return TLO.CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ KnownBits Known;
+
+ bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+ AssumeSingleUse);
+}
+
+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
+// TODO: Under what circumstances can we create nodes? Constant folding?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Ignore UNDEFs.
+ if (Op.isUndef())
+ return SDValue();
+
+ // Not demanding any bits/elts from Op.
+ if (DemandedBits == 0 || DemandedElts == 0)
+ return DAG.getUNDEF(Op.getValueType());
+
+ unsigned NumElts = DemandedElts.getBitWidth();
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::BITCAST: {
+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
+
+ if (NumSrcEltBits == NumDstEltBits)
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+
+ // TODO - bigendian once we have test coverage.
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumDstEltBits / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ // TODO - bigendian once we have test coverage.
+ if ((NumSrcEltBits % NumDstEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / NumDstEltBits;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * NumDstEltBits;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ break;
+ }
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
+ return Op.getOperand(0);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ IdentityLHS &= (M == (int)i);
+ IdentityRHS &= ((M - NumElts) == i);
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(Op.getValueType());
+ if (IdentityLHS)
+ return Op.getOperand(0);
+ if (IdentityRHS)
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
+/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of Known bits for the expression (used to simplify the
+/// caller). The Known bits may only be accurate for those bits in the
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth, bool AssumeSingleUse) const {
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
+ assert(Op.getScalarValueSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+
+ unsigned NumElts = OriginalDemandedElts.getBitWidth();
+ assert((!Op.getValueType().isVector() ||
+ NumElts == Op.getValueType().getVectorNumElements()) &&
+ "Unexpected vector size");
+
+ APInt DemandedBits = OriginalDemandedBits;
+ APInt DemandedElts = OriginalDemandedElts;
+ SDLoc dl(Op);
+ auto &DL = TLO.DAG.getDataLayout();
+
+ // Don't know anything.
+ Known = KnownBits(BitWidth);
+
+ // Undef operand.
+ if (Op.isUndef())
+ return false;
+
+ if (Op.getOpcode() == ISD::Constant) {
+ // We know all of the bits for a constant!
+ Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return false;
+ }
+
+ // Other users may use these bits.
+ EVT VT = Op.getValueType();
+ if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the Known bits to
+ // simplify things downstream.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedBits/Elts to all bits.
+ DemandedBits = APInt::getAllOnesValue(BitWidth);
+ DemandedElts = APInt::getAllOnesValue(NumElts);
+ } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+ // Not demanding any bits/elts from Op.
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
+ return false;
+ }
+
+ KnownBits Known2, KnownOut;
+ switch (Op.getOpcode()) {
+ case ISD::TargetConstant:
+ llvm_unreachable("Can't simplify this node");
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0])
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ KnownBits SrcKnown;
+ SDValue Src = Op.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
+ return true;
+ Known = SrcKnown.zextOrTrunc(BitWidth, false);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ // Collect the known bits that are shared by every demanded element.
+ // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (getTargetConstantFromLoad(LD)) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+
+ // If index isn't constant, assume we need all vector elements AND the
+ // inserted element.
+ APInt DemandedVecElts(DemandedElts);
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ DemandedVecElts.clearBit(Idx);
+
+ // Inserted element is not required.
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ }
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ Known = KnownScl.zextOrTrunc(BitWidth, false);
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+ Depth + 1))
+ return true;
+
+ if (!!DemandedVecElts) {
+ Known.One &= KnownVec.One;
+ Known.Zero &= KnownVec.Zero;
+ }
+
+ return false;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+
+ // If index isn't constant, assume we need the original demanded base
+ // elements and ALL the inserted subvector elements.
+ APInt BaseElts = DemandedElts;
+ APInt SubElts = APInt::getAllOnesValue(NumSubElts);
+ if (isa<ConstantSDNode>(Op.getOperand(2))) {
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
+ if (Idx.ule(NumElts - NumSubElts)) {
+ unsigned SubIdx = Idx.getZExtValue();
+ SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+ }
+ }
+
+ KnownBits KnownSub, KnownBase;
+ if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ Depth + 1))
+ return true;
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!SubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
+ }
+ if (!!BaseElts) {
+ Known.One &= KnownBase.One;
+ Known.Zero &= KnownBase.Zero;
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If index isn't constant, assume we need all the source vector elements.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ APInt DemandedSubElts =
+ DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ // Known bits are shared by every demanded subvector element.
+ if (!!DemandedSubElts) {
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ DemandedLHS.clearAllBits();
+ DemandedRHS.clearAllBits();
+ break;
+ }
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ if (!!DemandedLHS || !!DemandedRHS) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ if (!!DemandedRHS) {
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+ break;
+ }
+ case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
+ // Do not increment Depth here; that can cause an infinite loop.
+ KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
+ // If the LHS already has zeros where RHSC does, this 'and' is dead.
+ if ((LHSKnown.Zero & DemandedBits) ==
+ (~RHSC->getAPIntValue() & DemandedBits))
+ return TLO.CombineTo(Op, Op0);
+
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
+ return true;
+
+ // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+ // constant, but if this 'and' is only clearing bits that were just set by
+ // the xor, then this 'and' can be eliminated by shrinking the mask of
+ // the xor. For example, for a 32-bit X:
+ // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+ if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+ LHSKnown.One == ~RHSC->getAPIntValue()) {
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
+ return TLO.CombineTo(Op, Xor);
+ }
+ }
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
+ return TLO.CombineTo(Op, Op1);
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ Known.One &= Known2.One;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ Known.Zero |= Known2.Zero;
+ break;
+ }
+ case ISD::OR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ Known.Zero &= Known2.Zero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ Known.One |= Known2.One;
+ break;
+ }
+ case ISD::XOR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if (DemandedBits.isSubsetOf(Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+
+ if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ // If one side is a constant, and all of the known set bits on the other
+ // side are also set in the constant, turn this into an AND, as we know
+ // the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if (C->getAPIntValue() == Known2.One) {
+ SDValue ANDC =
+ TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
+ }
+
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a 'not' op, and that is better for combining
+ // and codegen.
+ if (!C->isAllOnesValue()) {
+ if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
+ }
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
+ }
+ }
+
+ Known = std::move(KnownOut);
+ break;
+ }
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(VT) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
+
+ // TODO: Should we check for other forms of sign-bit comparisons?
+ // Examples: X <= -1, X >= 0
+ }
+ if (getBooleanContents(Op0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case ISD::SHL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ // If the shift count is an invalid immediate, don't do anything.
+ if (SA->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ // TODO - support non-uniform vector amounts.
+ if (Op0.getOpcode() == ISD::SRL) {
+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+ }
+
+ if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (Op0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = Op0.getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ unsigned InnerBits = InnerVT.getScalarSizeInBits();
+ if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy(InnerVT, DL);
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ }
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse()) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(InnerOp.getOperand(1))) {
+ unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
+ DemandedBits.getActiveBits() <=
+ (InnerBits - InnerShAmt + ShAmt) &&
+ DemandedBits.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op1.getValueType());
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
+ }
+ }
+ }
+ }
+
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
+ // low bits known zero.
+ Known.Zero.setLowBits(ShAmt);
+ }
+ break;
+ }
+ case ISD::SRL: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ // If the shift count is an invalid immediate, don't do anything.
+ if (SA->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ EVT ShiftVT = Op1.getValueType();
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ // TODO - support non-uniform vector amounts.
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ if (!DemandedBits.intersects(
+ APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ Known.Zero.setHighBits(ShAmt); // High bits known zero.
+ }
+ break;
+ }
+ case ISD::SRA: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedBits.isOneValue())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ // If the shift count is an invalid immediate, don't do anything.
+ if (SA->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (DemandedBits.countLeadingZeros() < ShAmt)
+ InDemandedMask.setSignBit();
+
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ DemandedBits.countLeadingZeros() >= ShAmt) {
+ SDNodeFlags Flags;
+ Flags.setExact(Op->getFlags().hasExact());
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
+ }
+
+ int Log2 = DemandedBits.exactLogBase2();
+ if (Log2 >= 0) {
+ // The bit must come from the sign.
+ SDValue NewSA =
+ TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
+ }
+
+ if (Known.One[BitWidth - ShAmt - 1])
+ // New bits are known one.
+ Known.One.setHighBits(ShAmt);
+ }
+ break;
+ }
+ case ISD::FSHL:
+ case ISD::FSHR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
+
+ // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
+ // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
+ APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
+ APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+
+ Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
+ }
+ case ISD::BITREVERSE: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.reverseBits();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.reverseBits();
+ Known.Zero = Known2.Zero.reverseBits();
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ SDValue Op0 = Op.getOperand(0);
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+ // If we only care about the highest bit, don't bother shifting right.
+ if (DemandedBits.isSignMask()) {
+ unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+ bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = VT;
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
+
+ SDValue ShiftAmt =
+ TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
+ }
+ }
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (DemandedBits.getActiveBits() <= ExVTBits)
+ return TLO.CombineTo(Op, Op0);
+
+ APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits.setBit(ExVTBits - 1);
+
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (Known.Zero[ExVTBits - 1])
+ return TLO.CombineTo(
+ Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
+
+ APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ Known.One.setBitsFrom(ExVTBits);
+ Known.Zero &= Mask;
+ } else { // Input sign bit unknown
+ Known.Zero &= Mask;
+ Known.One &= Mask;
+ }
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT HalfVT = Op.getOperand(0).getValueType();
+ unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
+
+ APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+
+ KnownBits KnownLo, KnownHi;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
+ return true;
+
+ Known.Zero = KnownLo.Zero.zext(BitWidth) |
+ KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
+
+ Known.One = KnownLo.One.zext(BitWidth) |
+ KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+ break;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InDemandedBits.setBit(InBits - 1);
+
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (Known.isNonNegative()) {
+ unsigned Opc =
+ IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
+
+ // If we only need the bottom element then we can just bitcast.
+ // TODO: Handle ANY_EXTEND?
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, false /* => any extend */);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
+ APInt TruncMask = DemandedBits.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
+ return true;
+ Known = Known.trunc(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Src.getNode()->hasOneUse()) {
+ switch (Src.getOpcode()) {
+ default:
+ break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+
+ auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
+ break;
+
+ SDValue Shift = Src.getOperand(1);
+ uint64_t ShVal = ShAmt->getZExtValue();
+
+ if (TLO.LegalTypes())
+ Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+
+ APInt HighBits =
+ APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShVal);
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & DemandedBits)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
+ }
+ break;
+ }
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
+ TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ Known.Zero |= ~InMask;
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+
+ // Demand the bits from every vector element without a constant index.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+ if (CIdx->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ APInt DemandedSrcBits = DemandedBits;
+ if (BitWidth > EltBitWidth)
+ DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
+
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ Known = Known2;
+ if (BitWidth > EltBitWidth)
+ Known = Known.zext(BitWidth, false /* => any extend */);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
+ DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
+ SrcVT.isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
+ SrcVT != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
+ EVT Ty = OpVTLegal ? VT : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
+ unsigned OpVTSizeInBits = Op.getValueSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
+ unsigned ShVal = Op.getValueSizeInBits() - 1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+ }
+ }
+
+ // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
+ // Demand the elt/bit if any of the original elts/bits are demanded.
+ // TODO - bigendian once we have test coverage.
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = BitWidth / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
+ return true;
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+ } else if ((NumSrcEltBits % BitWidth) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / BitWidth;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * BitWidth;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SrcVT.isVector()) {
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
+ return true;
+ }
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+ }
+
+ // If this is a bitcast, let computeKnownBits handle it. Only do this on a
+ // recursive call where Known may be useful to the caller.
+ if (Depth > 0) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false;
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
+ // See if the operation should be performed at a smaller bit width.
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ return true;
+ }
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // If we have a constant operand, we may be able to turn it into -1 if we
+ // do not demand the high bits. This can make the constant smaller to
+ // encode, allow more general folding, or match specialized instruction
+ // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
+ // is probably not useful (and could be detrimental).
+ ConstantSDNode *C = isConstOrConstSplat(Op1);
+ APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
+ if (C && !C->isAllOnesValue() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+ Known, TLO, Depth))
+ return true;
+ break;
+ }
+
+ // Just use computeKnownBits to compute output bits.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+ // Avoid folding to a constant if any OpaqueConstant is involved.
+ const SDNode *N = Op.getNode();
+ for (SDNodeIterator I = SDNodeIterator::begin(N),
+ E = SDNodeIterator::end(N);
+ I != E; ++I) {
+ SDNode *Op = *I;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->isOpaque())
+ return false;
+ }
+ // TODO: Handle float bits as well.
+ if (VT.isInteger())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ }
+
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &KnownUndef,
+ APInt &KnownZero,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+
+ bool Simplified =
+ SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
+ if (Simplified) {
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ return Simplified;
+}
+
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+ const APInt &UndefOp0,
+ const APInt &UndefOp1) {
+ EVT VT = BO.getValueType();
+ assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
+ "Vector binop only");
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(UndefOp0.getBitWidth() == NumElts &&
+ UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+ auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+ const APInt &UndefVals) {
+ if (UndefVals[Index])
+ return DAG.getUNDEF(EltVT);
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ // Try hard to make sure that the getNode() call is not creating temporary
+ // nodes. Ignore opaque integers because they do not constant fold.
+ SDValue Elt = BV->getOperand(Index);
+ auto *C = dyn_cast<ConstantSDNode>(Elt);
+ if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+ return Elt;
+ }
+
+ return SDValue();
+ };
+
+ APInt KnownUndef = APInt::getNullValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // If both inputs for this element are either constant or undef and match
+ // the element type, compute the constant/undef result for this element of
+ // the vector.
+ // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+ // not handle FP constants. The code within getNode() should be refactored
+ // to avoid the danger of creating a bogus temporary node here.
+ SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+ SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+ if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+ if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+ KnownUndef.setBit(i);
+ }
+ return KnownUndef;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(
+ SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
+ APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = OriginalDemandedElts;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert(VT.isVector() && "Expected vector op");
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
+ KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+
+ // Undef operand.
+ if (Op.isUndef()) {
+ KnownUndef.setAllBits();
+ return false;
+ }
+
+ // If Op has other users, assume that all elements are needed.
+ if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
+ DemandedElts.setAllBits();
+
+ // Not demanding any elements from Op.
+ if (DemandedElts == 0) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false;
+
+ SDLoc DL(Op);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0]) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ KnownUndef.setHighBits(NumElts - 1);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // We only handle vectors here.
+ // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
+ if (!SrcVT.isVector())
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumSrcElts == NumElts)
+ return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1);
+
+ APInt SrcZero, SrcUndef;
+ APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+
+ // Bitcast from 'large element' src vector to 'small element' vector, we
+ // must demand a source element if any DemandedElt maps to it.
+ if ((NumElts % NumSrcElts) == 0) {
+ unsigned Scale = NumElts / NumSrcElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // Try calling SimplifyDemandedBits, converting demanded elts to the bits
+ // of the large element.
+ // TODO - bigendian once we have test coverage.
+ if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
+ APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Ofs = (i % Scale) * EltSizeInBits;
+ SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
+ }
+
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+ return true;
+ }
+
+ // If the src element is zero/undef then all the output elements will be -
+ // only demanded elements are guaranteed to be correct.
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ if (SrcDemandedElts[i]) {
+ if (SrcZero[i])
+ KnownZero.setBits(i * Scale, (i + 1) * Scale);
+ if (SrcUndef[i])
+ KnownUndef.setBits(i * Scale, (i + 1) * Scale);
+ }
+ }
+ }
+
+ // Bitcast from 'small element' src vector to 'large element' vector, we
+ // demand all smaller source elements covered by the larger demanded element
+ // of this vector.
+ if ((NumSrcElts % NumElts) == 0) {
+ unsigned Scale = NumSrcElts / NumElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If all the src elements covering an output element are zero/undef, then
+ // the output element will be as well, assuming it was demanded.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (DemandedElts[i]) {
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownZero.setBit(i);
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownUndef.setBit(i);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Check all elements and simplify any unused elements with UNDEF.
+ if (!DemandedElts.isAllOnesValue()) {
+ // Don't simplify BROADCASTS.
+ if (llvm::any_of(Op->op_values(),
+ [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
+ SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
+ bool Updated = false;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i] && !Ops[i].isUndef()) {
+ Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
+ KnownUndef.setBit(i);
+ Updated = true;
+ }
+ }
+ if (Updated)
+ return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
+ }
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (SrcOp.isUndef()) {
+ KnownUndef.setBit(i);
+ } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
+ (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, i * NumSubElts);
+ KnownZero.insertBits(SubZero, i * NumSubElts);
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (!isa<ConstantSDNode>(Op.getOperand(2)))
+ break;
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
+ if (Idx.ugt(NumElts - NumSubElts))
+ break;
+ unsigned SubIdx = Idx.getZExtValue();
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ APInt BaseElts = DemandedElts;
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+
+ // If none of the base operand elements are demanded, replace it with undef.
+ if (!BaseElts && !Base.isUndef())
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT),
+ Op.getOperand(1),
+ Op.getOperand(2)));
+
+ if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, SubIdx);
+ KnownZero.insertBits(SubZero, SubIdx);
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+
+ // For a legal, constant insertion index, if we don't need this insertion
+ // then strip it, else remove it from the demanded elts.
+ if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+
+ APInt DemandedVecElts(DemandedElts);
+ DemandedVecElts.clearBit(Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownUndef.clearBit(Idx);
+ if (Scl.isUndef())
+ KnownUndef.setBit(Idx);
+
+ KnownZero.clearBit(Idx);
+ if (isNullConstant(Scl) || isNullFPConstant(Scl))
+ KnownZero.setBit(Idx);
+ break;
+ }
+
+ APInt VecUndef, VecZero;
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
+ Depth + 1))
+ return true;
+ // Without knowing the insertion index we can't set KnownUndef/KnownZero.
+ break;
+ }
+ case ISD::VSELECT: {
+ // Try to transform the select condition based on the current demanded
+ // elements.
+ // TODO: If a condition element is undef, we can choose from one arm of the
+ // select (and if one arm is undef, then we can propagate that to the
+ // result).
+ // TODO - add support for constant vselect masks (see IR version of this).
+ APInt UnusedUndef, UnusedZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
+ UnusedZero, TLO, Depth + 1))
+ return true;
+
+ // See if we can simplify either vselect operand.
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ KnownUndef = UndefLHS & UndefRHS;
+ KnownZero = ZeroLHS & ZeroRHS;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ // See if we can simplify either shuffle operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ // Simplify mask using undef elements from LHS/RHS.
+ bool Updated = false;
+ bool IdentityLHS = true, IdentityRHS = true;
+ SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int &M = NewMask[i];
+ if (M < 0)
+ continue;
+ if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
+ (M >= (int)NumElts && UndefRHS[M - NumElts])) {
+ Updated = true;
+ M = -1;
+ }
+ IdentityLHS &= (M < 0) || (M == (int)i);
+ IdentityRHS &= (M < 0) || ((M - NumElts) == i);
+ }
+
+ // Update legal shuffle masks based on demanded elements if it won't reduce
+ // to Identity which can cause premature removal of the shuffle mask.
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
+ SDValue LegalShuffle =
+ buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
+ NewMask, TLO.DAG);
+ if (LegalShuffle)
+ return TLO.CombineTo(Op, LegalShuffle);
+ }
+
+ // Propagate undef/zero elements from LHS/RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ } else if (M < (int)NumElts) {
+ if (UndefLHS[M])
+ KnownUndef.setBit(i);
+ if (ZeroLHS[M])
+ KnownZero.setBit(i);
+ } else {
+ if (UndefRHS[M - NumElts])
+ KnownUndef.setBit(i);
+ if (ZeroRHS[M - NumElts])
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ APInt SrcUndef, SrcZero;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+ if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
+ DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ // aext - if we just need the bottom element then we can bitcast.
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+ }
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
+ break;
+ }
+
+ // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // MAX, saturated math, etc.
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM: {
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS & ZeroRHS;
+ KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+ break;
+ }
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS;
+ KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
+ break;
+ }
+ case ISD::MUL:
+ case ISD::AND: {
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ // If either side has a zero element, then the result element is zero, even
+ // if the other is an UNDEF.
+ // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+ // and then handle 'and' nodes with the rest of the binop opcodes.
+ KnownZero |= SrcZero;
+ KnownUndef &= SrcUndef;
+ KnownUndef &= ~KnownZero;
+ break;
+ }
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
+ break;
+ default: {
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth))
+ return true;
+ } else {
+ KnownBits Known;
+ APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+ TLO, Depth, AssumeSingleUse))
+ return true;
+ }
+ break;
+ }
+ }
+ assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
+
+ // Constant fold all undef cases.
+ // TODO: Handle zero cases as well.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ return false;
+}
+
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the Known.
+void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ Known.resetAll();
+}
+
+void TargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, KnownBits &Known,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ Known.resetAll();
+}
+
+void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+
+ if (unsigned Align = DAG.InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2_32(Align));
+ }
+}
+
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &,
+ const SelectionDAG &,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedVectorElts if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedBits if you don't know whether Op"
+ " is a target node!");
+ computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
+ return false;
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
+SDValue
+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const {
+ bool LegalMask = isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(N0, N1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (!LegalMask)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
+}
+
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+ return nullptr;
+}
+
+bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isKnownNeverNaN if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
+// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
+// work with truncating build vectors and vectors with elements of less than
+// 8 bits.
+bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ APInt CVal;
+ if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ CVal = CN->getAPIntValue();
+ } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ auto *CN = BV->getConstantSplatNode();
+ if (!CN)
+ return false;
+
+ // If this is a truncating build vector, truncate the splat value.
+ // Otherwise, we may fail to match the expected values below.
+ unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
+ CVal = CN->getAPIntValue();
+ if (BVEltWidth < CVal.getBitWidth())
+ CVal = CVal.trunc(BVEltWidth);
+ } else {
+ return false;
+ }
+
+ switch (getBooleanContents(N->getValueType(0))) {
+ case UndefinedBooleanContent:
+ return CVal[0];
+ case ZeroOrOneBooleanContent:
+ return CVal.isOneValue();
+ case ZeroOrNegativeOneBooleanContent:
+ return CVal.isAllOnesValue();
+ }
+
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ // Only interested in constant splats, we don't care about undef
+ // elements in identifying boolean constants and getConstantSplatNode
+ // returns NULL if all ops are undef;
+ CN = BV->getConstantSplatNode();
+ if (!CN)
+ return false;
+ }
+
+ if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
+ return !CN->getAPIntValue()[0];
+
+ return CN->isNullValue();
+}
+
+bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
+ bool SExt) const {
+ if (VT == MVT::i1)
+ return N->isOne();
+
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
+ switch (Cnt) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // An extended value of 1 is always true, unless its original type is i1,
+ // in which case it will be sign extended to -1.
+ return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return N->isAllOnesValue() && SExt;
+ }
+ llvm_unreachable("Unexpected enumeration.");
+}
+
+/// This helper function of SimplifySetCC tries to optimize the comparison when
+/// either operand of the SetCC node is a bitwise-and instruction.
+SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ // Match these patterns in any of their permutations:
+ // (X & Y) == Y
+ // (X & Y) != Y
+ if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+ std::swap(N0, N1);
+
+ EVT OpVT = N0.getValueType();
+ if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
+ (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+ return SDValue();
+
+ SDValue X, Y;
+ if (N0.getOperand(0) == N1) {
+ X = N0.getOperand(1);
+ Y = N0.getOperand(0);
+ } else if (N0.getOperand(1) == N1) {
+ X = N0.getOperand(0);
+ Y = N0.getOperand(1);
+ } else {
+ return SDValue();
+ }
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
+ // Note that where Y is variable and is known to have at most one bit set
+ // (for example, if it is Z & 1) we cannot do this; the expressions are not
+ // equivalent when Y == 0.
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType()))
+ return DAG.getSetCC(DL, VT, N0, Zero, Cond);
+ } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
+ // If the target supports an 'and-not' or 'and-complement' logic operation,
+ // try to use that to make a comparison operation more efficient.
+ // But don't do this transform if the mask is a single bit because there are
+ // more efficient ways to deal with that case (for example, 'bt' on x86 or
+ // 'rlwinm' on PPC).
+
+ // Bail out if the compare operand that we want to turn into a zero is
+ // already a zero (otherwise, infinite loop).
+ auto *YConst = dyn_cast<ConstantSDNode>(Y);
+ if (YConst && YConst->isNullValue())
+ return SDValue();
+
+ // Transform this into: ~X & Y == 0.
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+ return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
+ }
+
+ return SDValue();
+}
+
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ APInt I1 = C1->getAPIntValue();
+
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT) {
+ NewCond = ISD::CondCode::SETEQ;
+ } else if (Cond == ISD::CondCode::SETULE) {
+ NewCond = ISD::CondCode::SETEQ;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGT) {
+ NewCond = ISD::CondCode::SETNE;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGE) {
+ NewCond = ISD::CondCode::SETNE;
+ } else
+ return SDValue();
+
+ APInt I01 = C01->getAPIntValue();
+
+ auto checkConstants = [&I1, &I01]() -> bool {
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
+ };
+
+ if (checkConstants()) {
+ // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
+ } else {
+ // What if we invert constants? (and the target predicate)
+ I1.negate();
+ I01.negate();
+ NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ if (!checkConstants())
+ return SDValue();
+ // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
+ }
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
+/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
+/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
+/// handle the commuted versions of these patterns.
+SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ unsigned BOpcode = N0.getOpcode();
+ assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
+ "Unexpected binop");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+ // (X + Y) == X --> Y == 0
+ // (X - Y) == X --> Y == 0
+ // (X ^ Y) == X --> Y == 0
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+ if (X == N1)
+ return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
+
+ if (Y != N1)
+ return SDValue();
+
+ // (X + Y) == Y --> X == 0
+ // (X ^ Y) == Y --> X == 0
+ if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
+
+ // The shift would not be valid if the operands are boolean (i1).
+ if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
+ return SDValue();
+
+ // (X - Y) == Y --> X == Y << 1
+ EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
+ !DCI.isBeforeLegalize());
+ SDValue One = DAG.getConstant(1, DL, ShiftVT);
+ SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(YShl1.getNode());
+ return DAG.getSetCC(DL, VT, X, YShl1, Cond);
+}
+
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
+SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI,
+ const SDLoc &dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+
+ // Constant fold or commute setcc.
+ if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
+ return Fold;
+
+ // Ensure that the constant occurs on the RHS and fold constant comparisons.
+ // TODO: Handle non-splat vector constants. All undef causes trouble.
+ ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+ if (isConstOrConstSplat(N0) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
+ // If we have a subtract with the same 2 non-constant operands as this setcc
+ // -- but in reverse order -- then try to commute the operands of this setcc
+ // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+ // instruction on some targets.
+ if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+ DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
+ !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt = N0.getConstantOperandAPInt(1);
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP ||
+ N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
+ }
+
+ // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ // (sext x) == C --> x == (trunc C)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ DCI.isBeforeLegalize() && N0->hasOneUse()) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreExt;
+ bool Signed = false;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreExt = N0->getOperand(0);
+ }
+ } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
+ // SExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ Signed = true;
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD / SEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
+ Signed = true;
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ }
+ }
+
+ // Figure out how many bits we need to preserve this constant.
+ unsigned ReqdBits = Signed ?
+ C1.getBitWidth() - C1.getNumSignBits() + 1 :
+ C1.getActiveBits();
+
+ // Make sure we're not losing bits from the constant.
+ if (MinBits > 0 &&
+ MinBits < C1.getBitWidth() &&
+ MinBits >= ReqdBits) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+ if (MinBits == 1 && C1 == 1)
+ // Invert the condition.
+ return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+
+ // If truncating the setcc operands is not desirable, we can still
+ // simplify the expression in some cases:
+ // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
+ // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
+ // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
+ // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
+ SDValue TopSetCC = N0->getOperand(0);
+ unsigned N0Opc = N0->getOpcode();
+ bool SExt = (N0Opc == ISD::SIGN_EXTEND);
+ if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
+ TopSetCC.getOpcode() == ISD::SETCC &&
+ (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
+ (isConstFalseVal(N1C) ||
+ isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
+
+ bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
+ (!N1C->isNullValue() && Cond == ISD::SETNE);
+
+ if (!Inverse)
+ return TopSetCC;
+
+ ISD::CondCode InvCond = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
+ TopSetCC.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
+ TopSetCC.getOperand(1),
+ InvCond);
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0, the test is for
+ // equality or unsigned, and all 1 bits of the const are in the same
+ // partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ !ISD::isSignedIntSetCC(Cond) &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (Lod->isSimple() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if (Mask.isSubsetOf(newMask)) {
+ if (DAG.getDataLayout().isLittleEndian())
+ bestOffset = (uint64_t)offset * (width/8);
+ else
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask <<= width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound() &&
+ shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, dl, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(
+ newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ dl, newVT)),
+ DAG.getConstant(0LL, dl, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ:
+ return DAG.getConstant(0, dl, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE:
+ return DAG.getConstant(1, dl, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), dl, VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), dl, VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
+ EVT NewSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+ SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
+
+ SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
+ NewConst, Cond);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
+ }
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
+ return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, dl, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ dl, ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR) {
+ Val = N0.getOperand(0);
+ } else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->isOne() &&
+ (VT == MVT::i1 ||
+ getBooleanContents(N0->getValueType(0)) ==
+ ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ }
+ if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ if (Op0.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
+ KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ }
+
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
+ }
+
+ // These simplifications apply to splat vectors as well.
+ // TODO: Handle more splat vector cases.
+ if (auto *N1C = isConstOrConstSplat(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ // X >= MIN --> true
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ // X <= MAX --> true
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ // X <= C0 --> X < (C0 + 1)
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
+
+ if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setlt X, Max --> setne X, Max
+ if (C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if (C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
+
+ if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setgt X, Min --> setne X, Min
+ if (C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if (C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
+
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isNullValue())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+ }
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, dl, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+ }
+ }
+
+ // Back to non-vector simplifications.
+ // TODO: Can we do these for vector splats?
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const APInt &C1 = N1C->getAPIntValue();
+ EVT ShValTy = N0.getValueType();
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
+ N0.getOpcode() == ISD::AND) {
+ auto &DL = DAG.getDataLayout();
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();
+ if (AndRHS->getAPIntValue().isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ unsigned ShCt = C1.logBase2();
+ if (C1.isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
+ }
+ }
+ }
+ }
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl,
+ ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC.lshrInPlace(ShiftBits);
+ if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+ isLegalICmpImmediate(NewC.getSExtValue())) {
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
+ }
+
+ if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
+ auto *CFP = cast<ConstantFPSDNode>(N1);
+ assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // setcc (fneg x), C -> setcc swap(pred) x, -C
+ if (N0.getOpcode() == ISD::FNEG) {
+ ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
+ SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
+ }
+ }
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ assert(!N0.getValueType().isInteger() &&
+ "Integer types should be handled by FoldSetCC");
+
+ bool EqTrue = ISD::isTrueWhenEqual(Cond);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ if (UOF == unsigned(EqTrue))
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ dl, N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+ }
+ }
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
+ }
+
+ // (X+Y) == X --> Y == 0 and similar folds.
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Y has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.hasOneUse())
+ if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
+ return V;
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR)
+ if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
+ return V;
+
+ if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
+ return V;
+ }
+
+ // Fold remainder of division by a constant.
+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // When division is cheap or optimizing for minimum size,
+ // fall through to DIVREM creation by skipping this fold.
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (N0.getOpcode() == ISD::UREM) {
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ } else if (N0.getOpcode() == ISD::SREM) {
+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
+ SDValue Temp;
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, OpVT);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
+ break;
+ }
+ if (VT.getScalarType() != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
+ N0 = DAG.getNode(ExtendCode, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
+ int64_t &Offset) const {
+
+ SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
+
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(StringRef Constraint) const {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r':
+ return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
+ if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
+ return C_Memory;
+ return C_Register;
+ }
+ return C_Unknown;
+}
+
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return nullptr;
+}
+
+SDValue TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) const {
+ return SDValue();
+}
+
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock ||
+ Op.getOpcode() == ISD::TargetBlockAddress) {
+ Ops.push_back(Op);
+ return;
+ }
+ LLVM_FALLTHROUGH;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+
+ GlobalAddressSDNode *GA;
+ ConstantSDNode *C;
+ BlockAddressSDNode *BA;
+ uint64_t Offset = 0;
+
+ // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
+ // etc., since getelementpointer is variadic. We can't use
+ // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
+ // while in this case the GA may be furthest from the root node which is
+ // likely an ISD::ADD.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0),
+ Offset + GA->getOffset()));
+ return;
+ } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
+ ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
+ BooleanContent BCont = getBooleanContents(MVT::i64);
+ ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+ : ISD::SIGN_EXTEND;
+ int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
+ : C->getSExtValue();
+ Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
+ SDLoc(C), MVT::i64));
+ return;
+ } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
+ ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetBlockAddress(
+ BA->getBlockAddress(), BA->getValueType(0),
+ Offset + BA->getOffset(), BA->getTargetFlags()));
+ return;
+ } else {
+ const unsigned OpCode = Op.getOpcode();
+ if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+ if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+ Op = Op.getOperand(1);
+ // Subtraction is not commutative.
+ else if (OpCode == ISD::ADD &&
+ (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+ Op = Op.getOperand(0);
+ else
+ return;
+ Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+ continue;
+ }
+ }
+ return;
+ }
+ break;
+ }
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+ StringRef Constraint,
+ MVT VT) const {
+ if (Constraint.empty() || Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
+ assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
+
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
+
+ // Figure out which register class contains this reg.
+ for (const TargetRegisterClass *RC : RI->regclasses()) {
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ if (!isLegalRC(*RI, *RC))
+ continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getRegAsmName(*I))) {
+ std::pair<unsigned, const TargetRegisterClass *> S =
+ std::make_pair(*I, RC);
+
+ // If this register class has the requested value type, return it,
+ // otherwise keep searching and return the first class found
+ // if no other is found which explicitly has the requested type.
+ if (RI->isTypeLegalForClass(*RC, VT))
+ return S;
+ if (!R.second)
+ R = S;
+ }
+ }
+ }
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
+}
+
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const DataLayout &DL,
+ const TargetRegisterInfo *TRI,
+ ImmutableCallSite CS) const {
+ /// Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ ConstraintOperands.emplace_back(std::move(CI));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT =
+ getSimpleValueType(DL, STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+ unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
+ OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
+ } else {
+ OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (!ConstraintOperands.empty()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+/// Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
+ return;
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+/// Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ bool UseSRA = false;
+ SmallVector<SDValue, 16> Shifts, Factors;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ APInt Divisor = C->getAPIntValue();
+ unsigned Shift = Divisor.countTrailingZeros();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t;
+ APInt Factor = Divisor;
+ while ((t = Divisor * Factor) != 1)
+ Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue Shift, Factor;
+ if (VT.isVector()) {
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ SDValue Res = Op0;
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ if (UseSRA) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
+ Created.push_back(Res.getNode());
+ }
+
+ return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
+}
+
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // If the sdiv has an 'exact' bit we can use a simpler lowering.
+ if (N->getFlags().hasExact())
+ return BuildExactSDIV(*this, N, dl, DAG, Created);
+
+ SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+
+ const APInt &Divisor = C->getAPIntValue();
+ APInt::ms magics = Divisor.magic();
+ int NumeratorFactor = 0;
+ int ShiftMask = -1;
+
+ if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ // If d is +1/-1, we just multiply the numerator by +1/-1.
+ NumeratorFactor = Divisor.getSExtValue();
+ magics.m = 0;
+ magics.s = 0;
+ ShiftMask = 0;
+ } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ // If d > 0 and m < 0, add the numerator.
+ NumeratorFactor = 1;
+ } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ // If d < 0 and m > 0, subtract the numerator.
+ NumeratorFactor = -1;
+ }
+
+ MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
+ Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts / magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue MagicFactor, Factor, Shift, ShiftMask;
+ if (VT.isVector()) {
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else {
+ MagicFactor = MagicFactors[0];
+ Factor = Factors[0];
+ Shift = Shifts[0];
+ ShiftMask = ShiftMasks[0];
+ }
+
+ // Multiply the numerator (operand 0) by the magic value.
+ // FIXME: We should support doing a MUL in a wider type.
+ SDValue Q;
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
+ : isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
+ Q = SDValue(LoHi.getNode(), 1);
+ } else
+ return SDValue(); // No mulhs or equivalent.
+ Created.push_back(Q.getNode());
+
+ // (Optionally) Add/subtract the numerator using Factor.
+ Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
+ Created.push_back(Q.getNode());
+
+ // Shift right algebraic by shift value.
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
+ Created.push_back(Q.getNode());
+
+ // Extract the sign bit, mask it and add it to the quotient.
+ SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
+ SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
+ Created.push_back(T.getNode());
+ T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
+ Created.push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ bool UseNPQ = false;
+ SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isNullValue())
+ return false;
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ APInt Divisor = C->getAPIntValue();
+ APInt::mu magics = Divisor.magicu();
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.a != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics = Divisor.lshr(PreShift).magicu(PreShift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.m;
+
+ unsigned SelNPQ;
+ if (magics.a == 0 || Divisor.isOneValue()) {
+ assert(magics.s < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.s;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.s - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
+ MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
+ NPQFactors.push_back(
+ DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getNullValue(EltBits),
+ dl, SVT));
+ PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Collect the shifts/magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+ if (VT.isVector()) {
+ PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+ MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+ NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+ PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else {
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ SDValue Q = N0;
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+
+ // FIXME: We should support doing a MUL in a wider type.
+ auto GetMULHU = [&](SDValue X, SDValue Y) {
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
+ : isOperationLegalOrCustom(ISD::MULHU, VT))
+ return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+ if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
+ : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+ SDValue LoHi =
+ DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+ return SDValue(LoHi.getNode(), 1);
+ }
+ return SDValue(); // No mulhu or equivalent
+ };
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = GetMULHU(Q, MagicFactor);
+ if (!Q)
+ return SDValue();
+
+ Created.push_back(Q.getNode());
+
+ if (UseNPQ) {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
+ Created.push_back(NPQ.getNode());
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (VT.isVector())
+ NPQ = GetMULHU(NPQ, NPQFactor);
+ else
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
+
+ Created.push_back(NPQ.getNode());
+
+ Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ Created.push_back(Q.getNode());
+ }
+
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+ return DAG.getSelect(dl, VT, IsOne, N0, Q);
+}
+
+/// If all values in Values that *don't* match the predicate are same 'splat'
+/// value, then replace all values with that splat value.
+/// Else, if AlternativeReplacement was provided, then replace all values that
+/// do match predicate with AlternativeReplacement value.
+static void
+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
+ std::function<bool(SDValue)> Predicate,
+ SDValue AlternativeReplacement = SDValue()) {
+ SDValue Replacement;
+ // Is there a value for which the Predicate does *NOT* match? What is it?
+ auto SplatValue = llvm::find_if_not(Values, Predicate);
+ if (SplatValue != Values.end()) {
+ // Does Values consist only of SplatValue's and values matching Predicate?
+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
+ return Value == *SplatValue || Predicate(Value);
+ })) // Then we shall replace values matching predicate with SplatValue.
+ Replacement = *SplatValue;
+ }
+ if (!Replacement) {
+ // Oops, we did not find the "baseline" splat value.
+ if (!AlternativeReplacement)
+ return; // Nothing to do.
+ // Let's replace with provided value then.
+ Replacement = AlternativeReplacement;
+ }
+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
+}
+
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 2> Built;
+ if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - Q = floor(((2^W) - 1) / D)
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isNullValue())
+ return SDValue();
+
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+
+ auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ const APInt &D = C->getAPIntValue();
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ // D is even if it has trailing zeros.
+ HadEvenDivisor |= (K != 0);
+ // D is a power-of-two if D0 is one.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded.
+ if (D.isOneValue()) {
+ // Set P and K amount to a bogus values so we can try to splat them.
+ P = 0;
+ K = -1;
+ assert(Q.isAllOnesValue() &&
+ "Expecting all-ones comparison for one divisor");
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ return SDValue();
+
+ // If this is a urem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a urem by a powers-of-two, avoid the fold since it can be
+ // best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // UREM: (rotr (mul N, P), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
+ }
+
+ // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+ return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+}
+
+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 7> Built;
+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ assert(Built.size() <= 7 && "Max size prediction failed.");
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // Fold:
+ // (seteq/ne (srem N, D), 0)
+ // To:
+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)
+ //
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
+ // - Q = floor((2 * A) / (2^K))
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isNullValue())
+ return SDValue();
+
+ bool HadIntMinDivisor = false;
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool NeedToApplyOffset = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
+
+ auto BuildSREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
+
+ // WARNING: this fold is only valid for positive divisors!
+ APInt D = C->getAPIntValue();
+ if (D.isNegative())
+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
+
+ HadIntMinDivisor |= D.isMinSignedValue();
+
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ if (!D.isMinSignedValue()) {
+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case
+ // we don't care about this lane in this fold, we'll special-handle it.
+ HadEvenDivisor |= (K != 0);
+ }
+
+ // D is a power-of-two if D0 is one. This includes INT_MIN.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // A = floor((2^(W - 1) - 1) / D0) & -2^K
+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);
+ A.clearLowBits(K);
+
+ if (!D.isMinSignedValue()) {
+ // If divisor INT_MIN, then we don't care about this lane in this fold,
+ // we'll special-handle it.
+ NeedToApplyOffset |= A != 0;
+ }
+
+ // Q = floor((2 * A) / (2^K))
+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
+
+ assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ "We are expecting that A is always less than all-ones for SVT");
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded. Likewise, we
+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.
+ if (D.isOneValue()) {
+ // Set P, A and K to a bogus values so we can try to splat them.
+ P = 0;
+ A = -1;
+ K = -1;
+
+ // x ?% 1 == 0 <--> true <--> x u<= -1
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ AAmts.push_back(DAG.getConstant(A, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
+ return SDValue();
+
+ // If this is a srem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
+ // since it can be best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, AVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn AAmts into a splat, since we don't care about the
+ // values that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, SVT));
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ AVal = DAG.getBuildVector(VT, DL, AAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ AVal = AAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ if (NeedToApplyOffset) {
+ // We need ADD to do this.
+ if (!isOperationLegalOrCustom(ISD::ADD, VT))
+ return SDValue();
+
+ // (add (mul N, P), A)
+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // SREM: (rotr (add (mul N, P), A), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
+ }
+
+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
+ SDValue Fold =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+
+ // If we didn't have lanes with INT_MIN divisor, then we're done.
+ if (!HadIntMinDivisor)
+ return Fold;
+
+ // That fold is only valid for positive divisors. Which effectively means,
+ // it is invalid for INT_MIN divisors. So if we have such a lane,
+ // we must fix-up results for said lanes.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+
+ if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustom(Cond, VT) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ Created.push_back(Fold.getNode());
+
+ SDValue IntMin = DAG.getConstant(
+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue IntMax = DAG.getConstant(
+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue Zero =
+ DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+
+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
+ Created.push_back(DivisorIsIntMin.getNode());
+
+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
+ Created.push_back(Masked.getNode());
+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
+ Created.push_back(MaskedIsZero.getNode());
+
+ // To produce final result we need to blend 2 vectors: 'SetCC' and
+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
+ // constant-folded, select can get lowered to a shuffle with constant mask.
+ SDValue Blended =
+ DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
+
+ return Blended;
+}
+
+bool TargetLowering::
+verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(0))) {
+ DAG.getContext()->emitError("argument to '__builtin_return_address' must "
+ "be a constant integer");
+ return true;
+ }
+
+ return false;
+}
+
+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
+
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return 0;
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ return isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
+ }
+ case ISD::FADD:
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ return 0;
+
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (!V2)
+ return 0;
+
+ // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V01 = std::max(V0, V1);
+ return V01 ? std::max(V01, V2) : 0;
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ }
+
+ return 0;
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
+
+ assert(Depth <= SelectionDAG::MaxRecursionDepth &&
+ "getNegatedExpression doesn't match isNegatibleForFree");
+ const SDNodeFlags Flags = Op->getFlags();
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+ }
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
+ case ISD::FADD:
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(0), Flags);
+ case ISD::FSUB:
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
+ if (N0CFP->isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0), Flags);
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(
+ Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+ getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1),
+ Flags);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V0 >= V1) {
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ SDValue Neg0 = getNegatedExpression(
+ Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
+ Op.getOperand(1), Neg2, Flags);
+ }
+
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Neg1, Neg2, Flags);
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
+ }
+
+ llvm_unreachable("Unknown code");
+}
+
+//===----------------------------------------------------------------------===//
+// Legalization Utilities
+//===----------------------------------------------------------------------===//
+
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+ SDValue LHS, SDValue RHS,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ MulExpansionKind Kind, SDValue LL,
+ SDValue LH, SDValue RL, SDValue RH) const {
+ assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
+ Opcode == ISD::SMUL_LOHI);
+
+ bool HasMULHS = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+ bool HasMULHU = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+ bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+ bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+
+ if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
+ return false;
+
+ unsigned OuterBitSize = VT.getScalarSizeInBits();
+ unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
+ unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
+
+ // LL, LH, RL, and RH must be either all NULL or all set to a value.
+ assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+ (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+ SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
+ auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
+ bool Signed) -> bool {
+ if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
+ Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
+ Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
+ return true;
+ }
+ return false;
+ };
+
+ SDValue Lo, Hi;
+
+ if (!LL.getNode() && !RL.getNode() &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
+ RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
+ }
+
+ if (!LL.getNode())
+ return false;
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(LHS, HighMask) &&
+ DAG.MaskedValueIsZero(RHS, HighMask)) {
+ // The inputs are both zero-extended.
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
+ if (Opcode != ISD::MUL) {
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ Result.push_back(Zero);
+ Result.push_back(Zero);
+ }
+ return true;
+ }
+ }
+
+ if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
+ RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ // TODO non-MUL case?
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
+ return true;
+ }
+ }
+
+ unsigned ShiftAmount = OuterBitSize - InnerBitSize;
+ EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
+ // FIXME getShiftAmountTy does not always return a sensible result when VT
+ // is an illegal type, and so the type may be too small to fit the shift
+ // amount. Override it with i32. The shift will have to be legalized.
+ ShiftAmountTy = MVT::i32;
+ }
+ SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+
+ if (!LH.getNode() && !RH.getNode() &&
+ isOperationLegalOrCustom(ISD::SRL, VT) &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
+ LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+ RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
+ RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+ }
+
+ if (!LH.getNode())
+ return false;
+
+ if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
+ return false;
+
+ Result.push_back(Lo);
+
+ if (Opcode == ISD::MUL) {
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ Result.push_back(Hi);
+ return true;
+ }
+
+ // Compute the full width result.
+ auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+ };
+
+ SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
+ return false;
+
+ // This is effectively the add part of a multiply-add of half-sized operands,
+ // so it cannot overflow.
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
+ return false;
+
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
+ isOperationLegalOrCustom(ISD::ADDE, VT));
+ if (UseGlue)
+ Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+ Merge(Lo, Hi));
+ else
+ Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
+
+ SDValue Carry = Next.getValue(1);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+
+ if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
+ return false;
+
+ if (UseGlue)
+ Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+ Carry);
+ else
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Zero, Carry);
+
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (Opcode == ISD::SMUL_LOHI) {
+ SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
+ Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
+
+ NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
+ Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
+ }
+
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ return true;
+}
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+ SelectionDAG &DAG, MulExpansionKind Kind,
+ SDValue LL, SDValue LH, SDValue RL,
+ SDValue RH) const {
+ SmallVector<SDValue, 2> Result;
+ bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+ N->getOperand(0), N->getOperand(1), Result, HiLoVT,
+ DAG, Kind, LL, LH, RL, RH);
+ if (Ok) {
+ assert(Result.size() == 2);
+ Lo = Result[0];
+ Hi = Result[1];
+ }
+ return Ok;
+}
+
+bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+ SDValue ShAmt;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ } else {
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ }
+
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+ // and that is undefined. We must compare and select to avoid UB.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+
+ // For fshl, 0-shift returns the 1st arg (X).
+ // For fshr, 0-shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
+ Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ return true;
+}
+
+// TODO: Merge with expandFunnelShift.
+bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Op1.getValueType();
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+
+ // If a rotate in the other direction is legal, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (isOperationLegal(RevRot, VT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
+ return true;
+ }
+
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // Otherwise,
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ //
+ assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+ "Expecting the type bitwidth to be a power of 2");
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
+ DAG.getNode(HsOpc, DL, VT, Op0, And1));
+ return true;
+}
+
+bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ SDLoc dl(SDValue(Node, 0));
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (SrcVT != MVT::f32 || DstVT != MVT::i64)
+ return false;
+
+ if (Node->isStrictFPOpcode())
+ // When a NaN is converted to an integer a trap is allowed. We can't
+ // use this expansion here because it would eliminate that trap. Other
+ // traps are also allowed and cannot be eliminated. See
+ // IEEE 754-2008 sec 5.8.
+ return false;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
+ unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+ EVT IntVT = SrcVT.changeTypeToInteger();
+ EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
+
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
+ SDValue Bias = DAG.getConstant(127, dl, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
+ SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
+
+ SDValue ExponentBits = DAG.getNode(
+ ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, dl, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, DstVT);
+
+ R = DAG.getSelectCC(
+ dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, DstVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, IntShVT)),
+ DAG.getNode(ISD::SRL, dl, DstVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, IntShVT)),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
+ DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
+
+ Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
+ DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
+ return true;
+}
+
+bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
+ SelectionDAG &DAG) const {
+ SDLoc dl(SDValue(Node, 0));
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ ISD::FP_TO_SINT;
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
+ return false;
+
+ // If the maximum float value is smaller then the signed integer range,
+ // the destination signmask can't be represented by the float, so we can
+ // just use FP_TO_SINT directly.
+ const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
+ APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
+ if (APFloat::opOverflow &
+ APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
+ if (Node->isStrictFPOpcode()) {
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ return true;
+ }
+
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+
+ bool Strict = Node->isStrictFPOpcode() ||
+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
+ if (Strict) {
+ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+ // signmask then offset (the result of which should be fully representable).
+ // Sel = Src < 0x8000000000000000
+ // Val = select Sel, Src, Src - 0x8000000000000000
+ // Ofs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Val) ^ Ofs
+
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue SrcBiased;
+ if (Node->isStrictFPOpcode())
+ SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Node->getOperand(0), Src, Cst });
+ else
+ SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
+ SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
+ SDValue SInt;
+ if (Node->isStrictFPOpcode()) {
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { SrcBiased.getValue(1), Val });
+ Chain = SInt.getValue(1);
+ } else
+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
+ } else {
+ // Expand based on maximum range of FP_TO_SINT:
+ // True = fp_to_sint(Src)
+ // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+ // Result = select (Src < 0x8000000000000000), True, False
+
+ SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+ DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+ DAG.getConstant(SignMask, dl, DstVT));
+ Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+ }
+ return true;
+}
+
+bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDValue Src = Node->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ if (SrcVT.getScalarType() != MVT::i64)
+ return false;
+
+ SDLoc dl(SDValue(Node, 0));
+ EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
+
+ if (DstVT.getScalarType() == MVT::f32) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+ Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
+ return true;
+ }
+
+ if (DstVT.getScalarType() == MVT::f64) {
+ // Only expand vector types if we have the appropriate vector bit
+ // operations.
+ if (SrcVT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+ return false;
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ return true;
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+ ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ EVT VT = Node->getValueType(0);
+ if (isOperationLegalOrCustom(NewOp, VT)) {
+ SDValue Quiet0 = Node->getOperand(0);
+ SDValue Quiet1 = Node->getOperand(1);
+
+ if (!Node->getFlags().hasNoNaNs()) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+ if (!DAG.isKnownNeverSNaN(Quiet0)) {
+ Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+ Node->getFlags());
+ }
+ if (!DAG.isKnownNeverSNaN(Quiet1)) {
+ Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+ Node->getFlags());
+ }
+ }
+
+ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+ }
+
+ // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
+ // instead if there are no NaNs.
+ if (Node->getFlags().hasNoNaNs()) {
+ unsigned IEEE2018Op =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+ if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+ return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
+ Node->getOperand(1), Node->getFlags());
+ }
+ }
+
+ return SDValue();
+}
+
+bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ return false;
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, dl, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ if (Len > 8)
+ Op =
+ DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
+
+ Result = Op;
+ return true;
+}
+
+bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ return true;
+}
+
+bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTTZ, VT)) {
+ Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ SDValue Tmp = DAG.getNode(
+ ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
+
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
+ Result =
+ DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
+ return true;
+ }
+
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
+ return true;
+}
+
+bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = N->getOperand(0);
+
+ // Only expand vector types if we have the appropriate vector operations.
+ if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ !isOperationLegalOrCustom(ISD::ADD, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ return false;
+
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, Op,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
+ return true;
+}
+
+SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
+ SDLoc SL(LD);
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+
+ unsigned Stride = SrcEltVT.getSizeInBits() / 8;
+ assert(SrcEltVT.isByteSized());
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue ScalarLoad =
+ DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
+ SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
+
+ return DAG.getMergeValues({Value, NewChain}, SL);
+}
+
+SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ SDLoc SL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!MemSclVT.isByteSized()) {
+ unsigned NumBits = StVT.getSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+
+ SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
+ SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
+ SDValue ShiftedElt =
+ DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
+ CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
+ }
+
+ return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
+ ST->getAlignment(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
+ }
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ assert(Stride && "Zero stride!");
+ // Extract each of the elements from the original vector and save them into
+ // memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+
+ SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(
+ Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
+ MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ Stores.push_back(Store);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
+}
+
+std::pair<SDValue, SDValue>
+TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ auto &MF = DAG.getMachineFunction();
+
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+ LoadedVT.isVector()) {
+ // Scalarize the load and let the individual components be handled.
+ SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
+ return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
+ return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ }
+
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ return std::make_pair(Result, newLoad.getValue(1));
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getStoreSize();
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ EVT PtrVT = Ptr.getValueType();
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset), MemVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+ LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ return std::make_pair(Load, TF);
+ }
+
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (DAG.getDataLayout().isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ return std::make_pair(Result, TF);
+}
+
+SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ auto &MF = DAG.getMachineFunction();
+ EVT StoreMemVT = ST->getMemoryVT();
+
+ SDLoc dl(ST);
+ if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (isTypeLegal(intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+ StoreMemVT.isVector()) {
+ // Scalarize the store and let the individual components be handled.
+ SDValue Result = scalarizeVectorStore(ST, DAG);
+ return Result;
+ }
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ Alignment, ST->getMemOperand()->getFlags());
+ return Result;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ MVT RegVT = getRegisterType(
+ *DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned StoredBytes = StoreMemVT.getStoreSize();
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(
+ Chain, dl, Val, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
+
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT LoadMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
+
+ Stores.push_back(
+ DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo()));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ return Result;
+ }
+
+ assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(
+ NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl,
+ DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+ Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags());
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(
+ Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ return Result;
+}
+
+SDValue
+TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
+ const SDLoc &DL, EVT DataVT,
+ SelectionDAG &DAG,
+ bool IsCompressedMemory) const {
+ SDValue Increment;
+ EVT AddrVT = Addr.getValueType();
+ EVT MaskVT = Mask.getValueType();
+ assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+ "Incompatible types of Data and Mask");
+ if (IsCompressedMemory) {
+ // Incrementing the pointer according to number of '1's in the mask.
+ EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+ SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+ if (MaskIntVT.getSizeInBits() < 32) {
+ MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+ MaskIntVT = MVT::i32;
+ }
+
+ // Count '1's with POPCNT.
+ Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
+ Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
+ // Scale is an element size in bytes.
+ SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
+ AddrVT);
+ Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+ } else
+ Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
+
+ return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
+}
+
+static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
+ SDValue Idx,
+ EVT VecVT,
+ const SDLoc &dl) {
+ if (isa<ConstantSDNode>(Idx))
+ return Idx;
+
+ EVT IdxVT = Idx.getValueType();
+ unsigned NElts = VecVT.getVectorNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
+ Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
+
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
+ DAG.getConstant(NElts - 1, dl, IdxVT));
+}
+
+SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
+ SDValue VecPtr, EVT VecVT,
+ SDValue Index) const {
+ SDLoc dl(Index);
+ // Make sure the index type is big enough to compute in.
+ Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
+
+ EVT EltVT = VecVT.getVectorElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
+
+ EVT IdxVT = Index.getValueType();
+
+ Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getConstant(EltSize, dl, IdxVT));
+ return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ assert(EmuTlsVar && "Cannot find EmuTlsVar ");
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setAdjustsStack(true); // Is this only for X86 target?
+ MFI.setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}
+
+SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
+ if (!isCtlzFast())
+ return SDValue();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc dl(Op);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ }
+ return SDValue();
+}
+
+SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
+ // usub.sat(a, b) -> umax(a, b) - b
+ if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+ SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
+ }
+
+ if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+ SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
+ SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
+ return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
+ }
+
+ unsigned OverflowOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ OverflowOp = ISD::SADDO;
+ break;
+ case ISD::UADDSAT:
+ OverflowOp = ISD::UADDO;
+ break;
+ case ISD::SSUBSAT:
+ OverflowOp = ISD::SSUBO;
+ break;
+ case ISD::USUBSAT:
+ OverflowOp = ISD::USUBO;
+ break;
+ default:
+ llvm_unreachable("Expected method to receive signed or unsigned saturation "
+ "addition or subtraction node.");
+ }
+
+ unsigned BitWidth = LHS.getScalarValueSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
+ LHS, RHS);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
+
+ if (Opcode == ISD::UADDSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS + RHS) | OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+ }
+ // Overflow ? 0xffff.... : (LHS + RHS)
+ return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
+ } else if (Opcode == ISD::USUBSAT) {
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS - RHS) & ~OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+ return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+ }
+ // Overflow ? 0 : (LHS - RHS)
+ return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
+ } else {
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
+ }
+}
+
+SDValue
+TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
+ assert((Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::UMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT) &&
+ "Expected a fixed point multiplication opcode");
+
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ unsigned Scale = Node->getConstantOperandVal(2);
+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned VTSize = VT.getScalarSizeInBits();
+
+ if (!Scale) {
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Saturating) {
+ if (isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
+ }
+
+ assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+ "Expected scale to be less than the number of bits if signed or at "
+ "most the number of bits if unsigned.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
+
+ // Get the upper and lower bits of the result.
+ SDValue Lo, Hi;
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ if (isOperationLegalOrCustom(LoHiOp, VT)) {
+ SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ Lo = Result.getValue(0);
+ Hi = Result.getValue(1);
+ } else if (isOperationLegalOrCustom(HiOp, VT)) {
+ Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+ } else if (VT.isVector()) {
+ return SDValue();
+ } else {
+ report_fatal_error("Unable to expand fixed point multiplication.");
+ }
+
+ if (Scale == VTSize)
+ // Result is just the top half since we'd be shifting by the width of the
+ // operand. Overflow impossible so this works for both UMULFIX and
+ // UMULFIXSAT.
+ return Hi;
+
+ // The result will need to be shifted right by the scale since both operands
+ // are scaled. The result is given to us in 2 halves, so we only want part of
+ // both in the result.
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ if (!Saturating)
+ return Result;
+
+ if (!Signed) {
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
+ // widened multiplication) aren't all zeroes.
+
+ // Saturate to max if ((Hi >> Scale) != 0),
+ // which is the same as if (Hi > ((1 << Scale) - 1))
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETUGT);
+
+ return Result;
+ }
+
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
+ // widened multiplication) aren't all ones or all zeroes.
+
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
+
+ if (Scale == 0) {
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
+ DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
+ // Saturated to SatMin if wide product is negative, and SatMax if wide
+ // product is positive ...
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
+ ISD::SETLT);
+ // ... but only if we overflowed.
+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
+ }
+
+ // We handled Scale==0 above so all the bits to examine is in Hi.
+
+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),
+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
+ // Saturate to min if (Hi >> (Scale - 1)) < -1),
+ // which is the same as if (HI < (-1 << (Scale - 1))
+ SDValue HighMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
+ return Result;
+}
+
+void TargetLowering::expandUADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+
+ // If ADD/SUBCARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Result = SDValue(NodeCarry.getNode(), 0);
+ Overflow = SDValue(NodeCarry.getNode(), 1);
+ return;
+ }
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+}
+
+void TargetLowering::expandSADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+
+ // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+ unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
+ if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
+ SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+ return;
+ }
+
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
+ SDValue ConditionRHS =
+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
+
+ Overflow = DAG.getBoolExtOrTrunc(
+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
+ ResultType, ResultType);
+}
+
+bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
+ SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+
+ // For power-of-two multiplications we can use a simpler shift expansion.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+ const APInt &C = RHSC->getAPIntValue();
+ // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+ if (C.isPowerOf2()) {
+ // smulo(x, signed_min) is same as umulo(x, signed_min).
+ bool UseArithShift = isSigned && !C.isMinSignedValue();
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+ Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT,
+ DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+ dl, VT, Result, ShiftAmt),
+ LHS, ISD::SETNE);
+ return true;
+ }
+ }
+
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorNumElements());
+
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (isTypeLegal(WideVT)) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
+ getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
+ } else {
+ if (VT.isVector())
+ return false;
+
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ CallOptions.setIsPostTypeLegalization(true);
+ if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ } else {
+ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ }
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Same as above.
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
+ } else {
+ BottomHalf = Ret.getOperand(1);
+ TopHalf = Ret.getOperand(0);
+ }
+ }
+
+ Result = BottomHalf;
+ if (isSigned) {
+ SDValue ShiftAmt = DAG.getConstant(
+ VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
+ } else {
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
+ DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // Truncate the result if SetCC returns a larger type than needed.
+ EVT RType = Node->getValueType(1);
+ if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+ Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
+
+ assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
+ "Unexpected result type for S/UMULO legalization");
+ return true;
+}
+
+SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ bool NoNaN = Node->getFlags().hasNoNaNs();
+ unsigned BaseOpcode = 0;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
+ case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
+ break;
+ }
+
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // Try to use a shuffle reduction for power of two vectors.
+ if (VT.isPow2VectorType()) {
+ while (VT.getVectorNumElements() > 1) {
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
+ break;
+
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ VT = HalfVT;
+ }
+ }
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
+
+ SDValue Res = Ops[0];
+ for (unsigned i = 1; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
+
+ // Result type may be wider than element type.
+ if (EltVT != Node->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
+ return Res;
+}