src - FreeBSD source tree

diff options


context:
space:
mode:

author	Ed Schouten <ed@FreeBSD.org>	2009-06-02 17:52:33 +0000
committer	Ed Schouten <ed@FreeBSD.org>	2009-06-02 17:52:33 +0000
commit	009b1c42aa6266385f2c37e227516b24077e6dd7 (patch)
tree	64ba909838c23261cace781ece27d106134ea451 /lib/CodeGen/SelectionDAG
download	src-009b1c42aa6266385f2c37e227516b24077e6dd7.tar.gz src-009b1c42aa6266385f2c37e227516b24077e6dd7.zip

vendor/llvm/llvm-r72732

Notes

Diffstat (limited to 'lib/CodeGen/SelectionDAG')

-rw-r--r--

lib/CodeGen/SelectionDAG/CMakeLists.txt

-rw-r--r--

lib/CodeGen/SelectionDAG/CallingConvLower.cpp

148

-rw-r--r--

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

6203

-rw-r--r--

lib/CodeGen/SelectionDAG/FastISel.cpp

1033

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

3091

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

1388

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

2382

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

1074

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypes.h

736

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

453

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

335

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

2151

-rw-r--r--

lib/CodeGen/SelectionDAG/Makefile

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

635

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp

268

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

1533

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

294

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h

179

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp

668

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

5743

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp

6052

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGBuild.h

558

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

1347

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp

416

-rw-r--r--

lib/CodeGen/SelectionDAG/TargetLowering.cpp

2592

25 files changed, 39316 insertions, 0 deletions

diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 000000000000..9ea59ea80c61
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt

@@ -0,0 +1,22 @@

+add_llvm_library(LLVMSelectionDAG

+ CallingConvLower.cpp

+ DAGCombiner.cpp

+ FastISel.cpp

+ LegalizeDAG.cpp

+ LegalizeFloatTypes.cpp

+ LegalizeIntegerTypes.cpp

+ LegalizeTypes.cpp

+ LegalizeTypesGeneric.cpp

+ LegalizeVectorOps.cpp

+ LegalizeVectorTypes.cpp

+ ScheduleDAGSDNodes.cpp

+ ScheduleDAGSDNodesEmit.cpp

+ ScheduleDAGFast.cpp

+ ScheduleDAGList.cpp

+ ScheduleDAGRRList.cpp

+ SelectionDAGBuild.cpp

+ SelectionDAG.cpp

+ SelectionDAGISel.cpp

+ SelectionDAGPrinter.cpp

+ TargetLowering.cpp

+ )

diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 000000000000..7cd2b73e8704
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp

@@ -0,0 +1,148 @@

+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the CCState class, used for lowering and implementing

+// calling conventions.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/CallingConvLower.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetMachine.h"

+using namespace llvm;

+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,

+ SmallVector<CCValAssign, 16> &locs)

+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),

+ TRI(*TM.getRegisterInfo()), Locs(locs) {

+ // No stack is used.

+ StackOffset = 0;

+ UsedRegs.resize((TRI.getNumRegs()+31)/32);

+// HandleByVal - Allocate a stack slot large enough to pass an argument by

+// value. The size and alignment information of the argument is encoded in its

+// parameter attribute.

+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,

+ MVT LocVT, CCValAssign::LocInfo LocInfo,

+ int MinSize, int MinAlign,

+ ISD::ArgFlagsTy ArgFlags) {

+ unsigned Align = ArgFlags.getByValAlign();

+ unsigned Size = ArgFlags.getByValSize();

+ if (MinSize > (int)Size)

+ Size = MinSize;

+ if (MinAlign > (int)Align)

+ Align = MinAlign;

+ unsigned Offset = AllocateStack(Size, Align);

+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));

+/// MarkAllocated - Mark a register and all of its aliases as allocated.

+void CCState::MarkAllocated(unsigned Reg) {

+ UsedRegs[Reg/32] |= 1 << (Reg&31);

+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))

+ for (; (Reg = *RegAliases); ++RegAliases)

+ UsedRegs[Reg/32] |= 1 << (Reg&31);

+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,

+/// incorporating info about the formals into this state.

+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {

+ unsigned NumArgs = TheArgs->getNumValues()-1;

+ for (unsigned i = 0; i != NumArgs; ++i) {

+ MVT ArgVT = TheArgs->getValueType(i);

+ ISD::ArgFlagsTy ArgFlags =

+ cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags();

+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {

+ cerr << "Formal argument #" << i << " has unhandled type "

+ << ArgVT.getMVTString() << "\n";

+ abort();

+ }

+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,

+/// incorporating info about the result values into this state.

+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {

+ // Determine which register each value should be copied into.

+ for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {

+ MVT VT = TheRet->getOperand(i*2+1).getValueType();

+ ISD::ArgFlagsTy ArgFlags =

+ cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags();

+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){

+ cerr << "Return operand #" << i << " has unhandled type "

+ << VT.getMVTString() << "\n";

+ abort();

+ }

+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info

+/// about the passed values into this state.

+void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) {

+ unsigned NumOps = TheCall->getNumArgs();

+ for (unsigned i = 0; i != NumOps; ++i) {

+ MVT ArgVT = TheCall->getArg(i).getValueType();

+ ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);

+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {

+ cerr << "Call operand #" << i << " has unhandled type "

+ << ArgVT.getMVTString() << "\n";

+ abort();

+ }

+/// AnalyzeCallOperands - Same as above except it takes vectors of types

+/// and argument flags.

+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,

+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,

+ CCAssignFn Fn) {

+ unsigned NumOps = ArgVTs.size();

+ for (unsigned i = 0; i != NumOps; ++i) {

+ MVT ArgVT = ArgVTs[i];

+ ISD::ArgFlagsTy ArgFlags = Flags[i];

+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {

+ cerr << "Call operand #" << i << " has unhandled type "

+ << ArgVT.getMVTString() << "\n";

+ abort();

+ }

+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,

+/// incorporating info about the passed values into this state.

+void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) {

+ for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) {

+ MVT VT = TheCall->getRetValType(i);

+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();

+ if (TheCall->isInreg())

+ Flags.setInReg();

+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {

+ cerr << "Call result #" << i << " has unhandled type "

+ << VT.getMVTString() << "\n";

+ abort();

+ }

+/// AnalyzeCallResult - Same as above except it's specialized for calls which

+/// produce a single value.

+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {

+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {

+ cerr << "Call result has unhandled type "

+ << VT.getMVTString() << "\n";

+ abort();

+ }

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..4c1710dd81fa
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -0,0 +1,6203 @@

+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run

+// both before and after the DAG is legalized.

+//

+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is

+// primarily intended to handle simplification opportunities that are implicit

+// in the LLVM IR and exposed by the various codegen lowering phases.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "dagcombine"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetFrameInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/ADT/SmallPtrSet.h"

+#include "llvm/ADT/Statistic.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/MathExtras.h"

+#include <algorithm>

+#include <set>

+using namespace llvm;

+STATISTIC(NodesCombined , "Number of dag nodes combined");

+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");

+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");

+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");

+namespace {

+ static cl::opt<bool>

+ CombinerAA("combiner-alias-analysis", cl::Hidden,

+ cl::desc("Turn on alias analysis during testing"));

+ static cl::opt<bool>

+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,

+ cl::desc("Include global information in alias analysis"));

+//------------------------------ DAGCombiner ---------------------------------//

+ class VISIBILITY_HIDDEN DAGCombiner {

+ SelectionDAG &DAG;

+ const TargetLowering &TLI;

+ CombineLevel Level;

+ CodeGenOpt::Level OptLevel;

+ bool LegalOperations;

+ bool LegalTypes;

+ // Worklist of all of the nodes that need to be simplified.

+ std::vector<SDNode*> WorkList;

+ // AA - Used for DAG load/store alias analysis.

+ AliasAnalysis &AA;

+ /// AddUsersToWorkList - When an instruction is simplified, add all users of

+ /// the instruction to the work lists because they might get more simplified

+ /// now.

+ ///

+ void AddUsersToWorkList(SDNode *N) {

+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

+ UI != UE; ++UI)

+ AddToWorkList(*UI);

+ }

+ /// visit - call the node-specific routine that knows how to fold each

+ /// particular type of node.

+ SDValue visit(SDNode *N);

+ public:

+ /// AddToWorkList - Add to the work list making sure it's instance is at the

+ /// the back (next to be processed.)

+ void AddToWorkList(SDNode *N) {

+ removeFromWorkList(N);

+ WorkList.push_back(N);

+ }

+ /// removeFromWorkList - remove all instances of N from the worklist.

+ ///

+ void removeFromWorkList(SDNode *N) {

+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),

+ WorkList.end());

+ }

+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,

+ bool AddTo = true);

+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {

+ return CombineTo(N, &Res, 1, AddTo);

+ }

+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,

+ bool AddTo = true) {

+ SDValue To[] = { Res0, Res1 };

+ return CombineTo(N, To, 2, AddTo);

+ }

+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);

+ private:

+ /// SimplifyDemandedBits - Check the specified integer node value to see if

+ /// it can be simplified or if things it uses can be simplified by bit

+ /// propagation. If so, return true.

+ bool SimplifyDemandedBits(SDValue Op) {

+ APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());

+ return SimplifyDemandedBits(Op, Demanded);

+ }

+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);

+ bool CombineToPreIndexedLoadStore(SDNode *N);

+ bool CombineToPostIndexedLoadStore(SDNode *N);

+ /// combine - call the node-specific routine that knows how to fold each

+ /// particular type of node. If that doesn't do anything, try the

+ /// target-specific DAG combines.

+ SDValue combine(SDNode *N);

+ // Visitation implementation - Implement dag node combining for different

+ // node types. The semantics are as follows:

+ // Return Value:

+ // SDValue.getNode() == 0 - No change was made

+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.

+ // otherwise - N should be replaced by the returned Operand.

+ //

+ SDValue visitTokenFactor(SDNode *N);

+ SDValue visitMERGE_VALUES(SDNode *N);

+ SDValue visitADD(SDNode *N);

+ SDValue visitSUB(SDNode *N);

+ SDValue visitADDC(SDNode *N);

+ SDValue visitADDE(SDNode *N);

+ SDValue visitMUL(SDNode *N);

+ SDValue visitSDIV(SDNode *N);

+ SDValue visitUDIV(SDNode *N);

+ SDValue visitSREM(SDNode *N);

+ SDValue visitUREM(SDNode *N);

+ SDValue visitMULHU(SDNode *N);

+ SDValue visitMULHS(SDNode *N);

+ SDValue visitSMUL_LOHI(SDNode *N);

+ SDValue visitUMUL_LOHI(SDNode *N);

+ SDValue visitSDIVREM(SDNode *N);

+ SDValue visitUDIVREM(SDNode *N);

+ SDValue visitAND(SDNode *N);

+ SDValue visitOR(SDNode *N);

+ SDValue visitXOR(SDNode *N);

+ SDValue SimplifyVBinOp(SDNode *N);

+ SDValue visitSHL(SDNode *N);

+ SDValue visitSRA(SDNode *N);

+ SDValue visitSRL(SDNode *N);

+ SDValue visitCTLZ(SDNode *N);

+ SDValue visitCTTZ(SDNode *N);

+ SDValue visitCTPOP(SDNode *N);

+ SDValue visitSELECT(SDNode *N);

+ SDValue visitSELECT_CC(SDNode *N);

+ SDValue visitSETCC(SDNode *N);

+ SDValue visitSIGN_EXTEND(SDNode *N);

+ SDValue visitZERO_EXTEND(SDNode *N);

+ SDValue visitANY_EXTEND(SDNode *N);

+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);

+ SDValue visitTRUNCATE(SDNode *N);

+ SDValue visitBIT_CONVERT(SDNode *N);

+ SDValue visitBUILD_PAIR(SDNode *N);

+ SDValue visitFADD(SDNode *N);

+ SDValue visitFSUB(SDNode *N);

+ SDValue visitFMUL(SDNode *N);

+ SDValue visitFDIV(SDNode *N);

+ SDValue visitFREM(SDNode *N);

+ SDValue visitFCOPYSIGN(SDNode *N);

+ SDValue visitSINT_TO_FP(SDNode *N);

+ SDValue visitUINT_TO_FP(SDNode *N);

+ SDValue visitFP_TO_SINT(SDNode *N);

+ SDValue visitFP_TO_UINT(SDNode *N);

+ SDValue visitFP_ROUND(SDNode *N);

+ SDValue visitFP_ROUND_INREG(SDNode *N);

+ SDValue visitFP_EXTEND(SDNode *N);

+ SDValue visitFNEG(SDNode *N);

+ SDValue visitFABS(SDNode *N);

+ SDValue visitBRCOND(SDNode *N);

+ SDValue visitBR_CC(SDNode *N);

+ SDValue visitLOAD(SDNode *N);

+ SDValue visitSTORE(SDNode *N);

+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);

+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue visitBUILD_VECTOR(SDNode *N);

+ SDValue visitCONCAT_VECTORS(SDNode *N);

+ SDValue visitVECTOR_SHUFFLE(SDNode *N);

+ SDValue XformToShuffleWithZero(SDNode *N);

+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);

+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);

+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);

+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);

+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);

+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,

+ SDValue N3, ISD::CondCode CC,

+ bool NotExtCompare = false);

+ SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,

+ DebugLoc DL, bool foldBooleans = true);

+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,

+ unsigned HiOp);

+ SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);

+ SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);

+ SDValue BuildSDIV(SDNode *N);

+ SDValue BuildUDIV(SDNode *N);

+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);

+ SDValue ReduceLoadWidth(SDNode *N);

+ SDValue ReduceLoadOpStoreWidth(SDNode *N);

+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);

+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,

+ /// looking for aliasing nodes and adding them to the Aliases vector.

+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,

+ SmallVector<SDValue, 8> &Aliases);

+ /// isAlias - Return true if there is any possibility that the two addresses

+ /// overlap.

+ bool isAlias(SDValue Ptr1, int64_t Size1,

+ const Value *SrcValue1, int SrcValueOffset1,

+ SDValue Ptr2, int64_t Size2,

+ const Value *SrcValue2, int SrcValueOffset2) const;

+ /// FindAliasInfo - Extracts the relevant alias information from the memory

+ /// node. Returns true if the operand was a load.

+ bool FindAliasInfo(SDNode *N,

+ SDValue &Ptr, int64_t &Size,

+ const Value *&SrcValue, int &SrcValueOffset) const;

+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,

+ /// looking for a better chain (aliasing node.)

+ SDValue FindBetterChain(SDNode *N, SDValue Chain);

+ /// getShiftAmountTy - Returns a type large enough to hold any valid

+ /// shift amount - before type legalization these can be huge.

+ MVT getShiftAmountTy() {

+ return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();

+ }

+public:

+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)

+ : DAG(D),

+ TLI(D.getTargetLoweringInfo()),

+ Level(Unrestricted),

+ OptLevel(OL),

+ LegalOperations(false),

+ LegalTypes(false),

+ AA(A) {}

+ /// Run - runs the dag combiner on all nodes in the work list

+ void Run(CombineLevel AtLevel);

+ };

+namespace {

+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted

+/// nodes from the worklist.

+class VISIBILITY_HIDDEN WorkListRemover :

+ public SelectionDAG::DAGUpdateListener {

+ DAGCombiner &DC;

+public:

+ explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}

+ virtual void NodeDeleted(SDNode *N, SDNode *E) {

+ DC.removeFromWorkList(N);

+ }

+ virtual void NodeUpdated(SDNode *N) {

+ // Ignore updates.

+ }

+};

+//===----------------------------------------------------------------------===//

+// TargetLowering::DAGCombinerInfo implementation

+//===----------------------------------------------------------------------===//

+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {

+ ((DAGCombiner*)DC)->AddToWorkList(N);

+SDValue TargetLowering::DAGCombinerInfo::

+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {

+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);

+SDValue TargetLowering::DAGCombinerInfo::

+CombineTo(SDNode *N, SDValue Res, bool AddTo) {

+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);

+SDValue TargetLowering::DAGCombinerInfo::

+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {

+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);

+void TargetLowering::DAGCombinerInfo::

+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {

+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);

+//===----------------------------------------------------------------------===//

+// Helper Functions

+//===----------------------------------------------------------------------===//

+/// isNegatibleForFree - Return 1 if we can compute the negated form of the

+/// specified expression for the same cost as the expression itself, or 2 if we

+/// can compute the negated form more cheaply than the expression itself.

+static char isNegatibleForFree(SDValue Op, bool LegalOperations,

+ unsigned Depth = 0) {

+ // No compile time optimizations on this type.

+ if (Op.getValueType() == MVT::ppcf128)

+ return 0;

+ // fneg is removable even if it has multiple uses.

+ if (Op.getOpcode() == ISD::FNEG) return 2;

+ // Don't allow anything with multiple uses.

+ if (!Op.hasOneUse()) return 0;

+ // Don't recurse exponentially.

+ if (Depth > 6) return 0;

+ switch (Op.getOpcode()) {

+ default: return false;

+ case ISD::ConstantFP:

+ // Don't invert constant FP values after legalize. The negated constant

+ // isn't necessarily legal.

+ return LegalOperations ? 0 : 1;

+ case ISD::FADD:

+ // FIXME: determine better conditions for this xform.

+ if (!UnsafeFPMath) return 0;

+ // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)

+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))

+ return V;

+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);

+ case ISD::FSUB:

+ // We can't turn -(A-B) into B-A when we honor signed zeros.

+ if (!UnsafeFPMath) return 0;

+ // fold (fneg (fsub A, B)) -> (fsub B, A)

+ return 1;

+ case ISD::FMUL:

+ case ISD::FDIV:

+ if (HonorSignDependentRoundingFPMath()) return 0;

+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))

+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))

+ return V;

+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);

+ case ISD::FP_EXTEND:

+ case ISD::FP_ROUND:

+ case ISD::FSIN:

+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);

+ }

+/// GetNegatedExpression - If isNegatibleForFree returns true, this function

+/// returns the newly negated expression.

+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,

+ bool LegalOperations, unsigned Depth = 0) {

+ // fneg is removable even if it has multiple uses.

+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);

+ // Don't allow anything with multiple uses.

+ assert(Op.hasOneUse() && "Unknown reuse!");

+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");

+ switch (Op.getOpcode()) {

+ default: assert(0 && "Unknown code");

+ case ISD::ConstantFP: {

+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();

+ V.changeSign();

+ return DAG.getConstantFP(V, Op.getValueType());

+ }

+ case ISD::FADD:

+ // FIXME: determine better conditions for this xform.

+ assert(UnsafeFPMath);

+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)

+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))

+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),

+ GetNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, Depth+1),

+ Op.getOperand(1));

+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),

+ GetNegatedExpression(Op.getOperand(1), DAG,

+ LegalOperations, Depth+1),

+ Op.getOperand(0));

+ case ISD::FSUB:

+ // We can't turn -(A-B) into B-A when we honor signed zeros.

+ assert(UnsafeFPMath);

+ // fold (fneg (fsub 0, B)) -> B

+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))

+ if (N0CFP->getValueAPF().isZero())

+ return Op.getOperand(1);

+ // fold (fneg (fsub A, B)) -> (fsub B, A)

+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),

+ Op.getOperand(1), Op.getOperand(0));

+ case ISD::FMUL:

+ case ISD::FDIV:

+ assert(!HonorSignDependentRoundingFPMath());

+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)

+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))

+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),

+ GetNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, Depth+1),

+ Op.getOperand(1));

+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))

+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),

+ Op.getOperand(0),

+ GetNegatedExpression(Op.getOperand(1), DAG,

+ LegalOperations, Depth+1));

+ case ISD::FP_EXTEND:

+ case ISD::FSIN:

+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),

+ GetNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, Depth+1));

+ case ISD::FP_ROUND:

+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),

+ GetNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, Depth+1),

+ Op.getOperand(1));

+ }

+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc

+// that selects between the values 1 and 0, making it equivalent to a setcc.

+// Also, set the incoming LHS, RHS, and CC references to the appropriate

+// nodes based on the type of node we are checking. This simplifies life a

+// bit for the callers.

+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,

+ SDValue &CC) {

+ if (N.getOpcode() == ISD::SETCC) {

+ LHS = N.getOperand(0);

+ RHS = N.getOperand(1);

+ CC = N.getOperand(2);

+ return true;

+ }

+ if (N.getOpcode() == ISD::SELECT_CC &&

+ N.getOperand(2).getOpcode() == ISD::Constant &&

+ N.getOperand(3).getOpcode() == ISD::Constant &&

+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&

+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {

+ LHS = N.getOperand(0);

+ RHS = N.getOperand(1);

+ CC = N.getOperand(4);

+ return true;

+ }

+ return false;

+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only

+// one use. If this is true, it allows the users to invert the operation for

+// free when it is profitable to do so.

+static bool isOneUseSetCC(SDValue N) {

+ SDValue N0, N1, N2;

+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())

+ return true;

+ return false;

+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,

+ SDValue N0, SDValue N1) {

+ MVT VT = N0.getValueType();

+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {

+ if (isa<ConstantSDNode>(N1)) {

+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))

+ SDValue OpNode =

+ DAG.FoldConstantArithmetic(Opc, VT,

+ cast<ConstantSDNode>(N0.getOperand(1)),

+ cast<ConstantSDNode>(N1));

+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);

+ } else if (N0.hasOneUse()) {

+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use

+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,

+ N0.getOperand(0), N1);

+ AddToWorkList(OpNode.getNode());

+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));

+ }

+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {

+ if (isa<ConstantSDNode>(N0)) {

+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))

+ SDValue OpNode =

+ DAG.FoldConstantArithmetic(Opc, VT,

+ cast<ConstantSDNode>(N1.getOperand(1)),

+ cast<ConstantSDNode>(N0));

+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);

+ } else if (N1.hasOneUse()) {

+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use

+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,

+ N1.getOperand(0), N0);

+ AddToWorkList(OpNode.getNode());

+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));

+ }

+ return SDValue();

+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,

+ bool AddTo) {

+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");

+ ++NodesCombined;

+ DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));

+ DOUT << " and " << NumTo-1 << " other values\n";

+ DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)

+ assert(N->getValueType(i) == To[i].getValueType() &&

+ "Cannot combine value to value of different type!"));

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesWith(N, To, &DeadNodes);

+ if (AddTo) {

+ // Push the new nodes and any users onto the worklist

+ for (unsigned i = 0, e = NumTo; i != e; ++i) {

+ if (To[i].getNode()) {

+ AddToWorkList(To[i].getNode());

+ AddUsersToWorkList(To[i].getNode());

+ }

+ // Finally, if the node is now dead, remove it from the graph. The node

+ // may not be dead if the replacement process recursively simplified to

+ // something else needing this node.

+ if (N->use_empty()) {

+ // Nodes can be reintroduced into the worklist. Make sure we do not

+ // process a node that has been replaced.

+ removeFromWorkList(N);

+ // Finally, since the node is now dead, remove it from the graph.

+ DAG.DeleteNode(N);

+ }

+ return SDValue(N, 0);

+void

+DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &

+ TLO) {

+ // Replace all uses. If any nodes become isomorphic to other nodes and

+ // are deleted, make sure to remove them from our worklist.

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);

+ // Push the new node and any (possibly new) users onto the worklist.

+ AddToWorkList(TLO.New.getNode());

+ AddUsersToWorkList(TLO.New.getNode());

+ // Finally, if the node is now dead, remove it from the graph. The node

+ // may not be dead if the replacement process recursively simplified to

+ // something else needing this node.

+ if (TLO.Old.getNode()->use_empty()) {

+ removeFromWorkList(TLO.Old.getNode());

+ // If the operands of this node are only used by the node, they will now

+ // be dead. Make sure to visit them first to delete dead nodes early.

+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)

+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())

+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());

+ DAG.DeleteNode(TLO.Old.getNode());

+ }

+/// SimplifyDemandedBits - Check the specified integer node value to see if

+/// it can be simplified or if things it uses can be simplified by bit

+/// propagation. If so, return true.

+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {

+ TargetLowering::TargetLoweringOpt TLO(DAG);

+ APInt KnownZero, KnownOne;

+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))

+ return false;

+ // Revisit the node.

+ AddToWorkList(Op.getNode());

+ // Replace the old value with the new one.

+ ++NodesCombined;

+ DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));

+ DOUT << '\n';

+ CommitTargetLoweringOpt(TLO);

+ return true;

+//===----------------------------------------------------------------------===//

+// Main DAG Combiner implementation

+//===----------------------------------------------------------------------===//

+void DAGCombiner::Run(CombineLevel AtLevel) {

+ // set the instance variables, so that the various visit routines may use it.

+ Level = AtLevel;

+ LegalOperations = Level >= NoIllegalOperations;

+ LegalTypes = Level >= NoIllegalTypes;

+ // Add all the dag nodes to the worklist.

+ WorkList.reserve(DAG.allnodes_size());

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = DAG.allnodes_end(); I != E; ++I)

+ WorkList.push_back(I);

+ // Create a dummy node (which is not added to allnodes), that adds a reference

+ // to the root node, preventing it from being deleted, and tracking any

+ // changes of the root.

+ HandleSDNode Dummy(DAG.getRoot());

+ // The root of the dag may dangle to deleted nodes until the dag combiner is

+ // done. Set it to null to avoid confusion.

+ DAG.setRoot(SDValue());

+ // while the worklist isn't empty, inspect the node on the end of it and

+ // try and combine it.

+ while (!WorkList.empty()) {

+ SDNode *N = WorkList.back();

+ WorkList.pop_back();

+ // If N has no uses, it is dead. Make sure to revisit all N's operands once

+ // N is deleted from the DAG, since they too may now be dead or may have a

+ // reduced number of uses, allowing other xforms.

+ if (N->use_empty() && N != &Dummy) {

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ AddToWorkList(N->getOperand(i).getNode());

+ DAG.DeleteNode(N);

+ continue;

+ }

+ SDValue RV = combine(N);

+ if (RV.getNode() == 0)

+ continue;

+ ++NodesCombined;

+ // If we get back the same node we passed in, rather than a new node or

+ // zero, we know that the node must have defined multiple values and

+ // CombineTo was used. Since CombineTo takes care of the worklist

+ // mechanics for us, we have no work to do in this case.

+ if (RV.getNode() == N)

+ continue;

+ assert(N->getOpcode() != ISD::DELETED_NODE &&

+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&

+ "Node was deleted but visit returned new node!");

+ DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));

+ DOUT << '\n';

+ WorkListRemover DeadNodes(*this);

+ if (N->getNumValues() == RV.getNode()->getNumValues())

+ DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);

+ else {

+ assert(N->getValueType(0) == RV.getValueType() &&

+ N->getNumValues() == 1 && "Type mismatch");

+ SDValue OpV = RV;

+ DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);

+ }

+ // Push the new node and any users onto the worklist

+ AddToWorkList(RV.getNode());

+ AddUsersToWorkList(RV.getNode());

+ // Add any uses of the old node to the worklist in case this node is the

+ // last one that uses them. They may become dead after this node is

+ // deleted.

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ AddToWorkList(N->getOperand(i).getNode());

+ // Finally, if the node is now dead, remove it from the graph. The node

+ // may not be dead if the replacement process recursively simplified to

+ // something else needing this node.

+ if (N->use_empty()) {

+ // Nodes can be reintroduced into the worklist. Make sure we do not

+ // process a node that has been replaced.

+ removeFromWorkList(N);

+ // Finally, since the node is now dead, remove it from the graph.

+ DAG.DeleteNode(N);

+ }

+ // If the root changed (e.g. it was a dead load, update the root).

+ DAG.setRoot(Dummy.getValue());

+SDValue DAGCombiner::visit(SDNode *N) {

+ switch(N->getOpcode()) {

+ default: break;

+ case ISD::TokenFactor: return visitTokenFactor(N);

+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);

+ case ISD::ADD: return visitADD(N);

+ case ISD::SUB: return visitSUB(N);

+ case ISD::ADDC: return visitADDC(N);

+ case ISD::ADDE: return visitADDE(N);

+ case ISD::MUL: return visitMUL(N);

+ case ISD::SDIV: return visitSDIV(N);

+ case ISD::UDIV: return visitUDIV(N);

+ case ISD::SREM: return visitSREM(N);

+ case ISD::UREM: return visitUREM(N);

+ case ISD::MULHU: return visitMULHU(N);

+ case ISD::MULHS: return visitMULHS(N);

+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);

+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);

+ case ISD::SDIVREM: return visitSDIVREM(N);

+ case ISD::UDIVREM: return visitUDIVREM(N);

+ case ISD::AND: return visitAND(N);

+ case ISD::OR: return visitOR(N);

+ case ISD::XOR: return visitXOR(N);

+ case ISD::SHL: return visitSHL(N);

+ case ISD::SRA: return visitSRA(N);

+ case ISD::SRL: return visitSRL(N);

+ case ISD::CTLZ: return visitCTLZ(N);

+ case ISD::CTTZ: return visitCTTZ(N);

+ case ISD::CTPOP: return visitCTPOP(N);

+ case ISD::SELECT: return visitSELECT(N);

+ case ISD::SELECT_CC: return visitSELECT_CC(N);

+ case ISD::SETCC: return visitSETCC(N);

+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);

+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);

+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);

+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);

+ case ISD::TRUNCATE: return visitTRUNCATE(N);

+ case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);

+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);

+ case ISD::FADD: return visitFADD(N);

+ case ISD::FSUB: return visitFSUB(N);

+ case ISD::FMUL: return visitFMUL(N);

+ case ISD::FDIV: return visitFDIV(N);

+ case ISD::FREM: return visitFREM(N);

+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);

+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);

+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);

+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);

+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);

+ case ISD::FP_ROUND: return visitFP_ROUND(N);

+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);

+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);

+ case ISD::FNEG: return visitFNEG(N);

+ case ISD::FABS: return visitFABS(N);

+ case ISD::BRCOND: return visitBRCOND(N);

+ case ISD::BR_CC: return visitBR_CC(N);

+ case ISD::LOAD: return visitLOAD(N);

+ case ISD::STORE: return visitSTORE(N);

+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);

+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);

+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);

+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);

+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);

+ }

+ return SDValue();

+SDValue DAGCombiner::combine(SDNode *N) {

+ SDValue RV = visit(N);

+ // If nothing happened, try a target-specific DAG combine.

+ if (RV.getNode() == 0) {

+ assert(N->getOpcode() != ISD::DELETED_NODE &&

+ "Node was deleted but visit returned NULL!");

+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||

+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {

+ // Expose the DAG combiner to the target combiner impls.

+ TargetLowering::DAGCombinerInfo

+ DagCombineInfo(DAG, Level == Unrestricted, false, this);

+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);

+ }

+ // If N is a commutative binary node, try commuting it to enable more

+ // sdisel CSE.

+ if (RV.getNode() == 0 &&

+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&

+ N->getNumValues() == 1) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ // Constant operands are canonicalized to RHS.

+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {

+ SDValue Ops[] = { N1, N0 };

+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),

+ Ops, 2);

+ if (CSENode)

+ return SDValue(CSENode, 0);

+ }

+ return RV;

+/// getInputChainForNode - Given a node, return its input chain if it has one,

+/// otherwise return a null sd operand.

+static SDValue getInputChainForNode(SDNode *N) {

+ if (unsigned NumOps = N->getNumOperands()) {

+ if (N->getOperand(0).getValueType() == MVT::Other)

+ return N->getOperand(0);

+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)

+ return N->getOperand(NumOps-1);

+ for (unsigned i = 1; i < NumOps-1; ++i)

+ if (N->getOperand(i).getValueType() == MVT::Other)

+ return N->getOperand(i);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {

+ // If N has two operands, where one has an input chain equal to the other,

+ // the 'other' chain is redundant.

+ if (N->getNumOperands() == 2) {

+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))

+ return N->getOperand(0);

+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))

+ return N->getOperand(1);

+ }

+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.

+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.

+ SmallPtrSet<SDNode*, 16> SeenOps;

+ bool Changed = false; // If we should replace this token factor.

+ // Start out with this token factor.

+ TFs.push_back(N);

+ // Iterate through token factors. The TFs grows when new token factors are

+ // encountered.

+ for (unsigned i = 0; i < TFs.size(); ++i) {

+ SDNode *TF = TFs[i];

+ // Check each of the operands.

+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {

+ SDValue Op = TF->getOperand(i);

+ switch (Op.getOpcode()) {

+ case ISD::EntryToken:

+ // Entry tokens don't need to be added to the list. They are

+ // rededundant.

+ Changed = true;

+ break;

+ case ISD::TokenFactor:

+ if ((CombinerAA || Op.hasOneUse()) &&

+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {

+ // Queue up for processing.

+ TFs.push_back(Op.getNode());

+ // Clean up in case the token factor is removed.

+ AddToWorkList(Op.getNode());

+ Changed = true;

+ break;

+ }

+ // Fall thru

+ default:

+ // Only add if it isn't already in the list.

+ if (SeenOps.insert(Op.getNode()))

+ Ops.push_back(Op);

+ else

+ Changed = true;

+ break;

+ }

+ SDValue Result;

+ // If we've change things around then replace token factor.

+ if (Changed) {

+ if (Ops.empty()) {

+ // The entry token is the only possible outcome.

+ Result = DAG.getEntryNode();

+ } else {

+ // New and improved token factor.

+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),

+ MVT::Other, &Ops[0], Ops.size());

+ }

+ // Don't add users to work list.

+ return CombineTo(N, Result, false);

+ }

+ return Result;

+/// MERGE_VALUES can always be eliminated.

+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {

+ WorkListRemover DeadNodes(*this);

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),

+ &DeadNodes);

+ removeFromWorkList(N);

+ DAG.DeleteNode(N);

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+static

+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,

+ SelectionDAG &DAG) {

+ MVT VT = N0.getValueType();

+ SDValue N00 = N0.getOperand(0);

+ SDValue N01 = N0.getOperand(1);

+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);

+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&

+ isa<ConstantSDNode>(N00.getOperand(1))) {

+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )

+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,

+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,

+ N00.getOperand(0), N01),

+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,

+ N00.getOperand(1), N01));

+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitADD(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (add x, undef) -> undef

+ if (N0.getOpcode() == ISD::UNDEF)

+ return N0;

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ // fold (add c1, c2) -> c1+c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);

+ // fold (add x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // fold (add Sym, c) -> Sym+c

+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))

+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&

+ GA->getOpcode() == ISD::GlobalAddress)

+ return DAG.getGlobalAddress(GA->getGlobal(), VT,

+ GA->getOffset() +

+ (uint64_t)N1C->getSExtValue());

+ // fold ((c1-A)+c2) -> (c1+c2)-A

+ if (N1C && N0.getOpcode() == ISD::SUB)

+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getConstant(N1C->getAPIntValue()+

+ N0C->getAPIntValue(), VT),

+ N0.getOperand(1));

+ // reassociate add

+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);

+ if (RADD.getNode() != 0)

+ return RADD;

+ // fold ((0-A) + B) -> B-A

+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&

+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));

+ // fold (A + (0-B)) -> A-B

+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&

+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));

+ // fold (A+(B-A)) -> B

+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))

+ return N1.getOperand(0);

+ // fold ((B-A)+A) -> B

+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))

+ return N0.getOperand(0);

+ // fold (A+(B-(A+C))) to (B-C)

+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&

+ N0 == N1.getOperand(1).getOperand(0))

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),

+ N1.getOperand(1).getOperand(1));

+ // fold (A+(B-(C+A))) to (B-C)

+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&

+ N0 == N1.getOperand(1).getOperand(1))

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),

+ N1.getOperand(1).getOperand(0));

+ // fold (A+((B-A)+or-C)) to (B+or-C)

+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&

+ N1.getOperand(0).getOpcode() == ISD::SUB &&

+ N0 == N1.getOperand(0).getOperand(1))

+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,

+ N1.getOperand(0).getOperand(0), N1.getOperand(1));

+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant

+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {

+ SDValue N00 = N0.getOperand(0);

+ SDValue N01 = N0.getOperand(1);

+ SDValue N10 = N1.getOperand(0);

+ SDValue N11 = N1.getOperand(1);

+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),

+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));

+ }

+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ // fold (a+b) -> (a|b) iff a and b share no bits.

+ if (VT.isInteger() && !VT.isVector()) {

+ APInt LHSZero, LHSOne;

+ APInt RHSZero, RHSOne;

+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());

+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);

+ if (LHSZero.getBoolValue()) {

+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);

+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.

+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.

+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||

+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);

+ }

+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )

+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {

+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);

+ if (Result.getNode()) return Result;

+ }

+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {

+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);

+ if (Result.getNode()) return Result;

+ }

+ return SDValue();

+SDValue DAGCombiner::visitADDC(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ // If the flag result is dead, turn this into an ADD.

+ if (N->hasNUsesOfValue(0, 1))

+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),

+ DAG.getNode(ISD::CARRY_FALSE,

+ N->getDebugLoc(), MVT::Flag));

+ // canonicalize constant to RHS.

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);

+ // fold (addc x, 0) -> x + no carry out

+ if (N1C && N1C->isNullValue())

+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,

+ N->getDebugLoc(), MVT::Flag));

+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.

+ APInt LHSZero, LHSOne;

+ APInt RHSZero, RHSOne;

+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());

+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);

+ if (LHSZero.getBoolValue()) {

+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);

+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.

+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.

+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||

+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))

+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),

+ DAG.getNode(ISD::CARRY_FALSE,

+ N->getDebugLoc(), MVT::Flag));

+ }

+ return SDValue();

+SDValue DAGCombiner::visitADDE(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue CarryIn = N->getOperand(2);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),

+ N1, N0, CarryIn);

+ // fold (adde x, y, false) -> (addc x, y)

+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)

+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);

+ return SDValue();

+SDValue DAGCombiner::visitSUB(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ MVT VT = N0.getValueType();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (sub x, x) -> 0

+ if (N0 == N1)

+ return DAG.getConstant(0, N->getValueType(0));

+ // fold (sub c1, c2) -> c1-c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);

+ // fold (sub x, c) -> (add x, -c)

+ if (N1C)

+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(-N1C->getAPIntValue(), VT));

+ // fold (A+B)-A -> B

+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)

+ return N0.getOperand(1);

+ // fold (A+B)-B -> A

+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)

+ return N0.getOperand(0);

+ // fold ((A+(B+or-C))-B) -> A+or-C

+ if (N0.getOpcode() == ISD::ADD &&

+ (N0.getOperand(1).getOpcode() == ISD::SUB ||

+ N0.getOperand(1).getOpcode() == ISD::ADD) &&

+ N0.getOperand(1).getOperand(0) == N1)

+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,

+ N0.getOperand(0), N0.getOperand(1).getOperand(1));

+ // fold ((A+(C+B))-B) -> A+C

+ if (N0.getOpcode() == ISD::ADD &&

+ N0.getOperand(1).getOpcode() == ISD::ADD &&

+ N0.getOperand(1).getOperand(1) == N1)

+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,

+ N0.getOperand(0), N0.getOperand(1).getOperand(0));

+ // fold ((A-(B-C))-C) -> A-B

+ if (N0.getOpcode() == ISD::SUB &&

+ N0.getOperand(1).getOpcode() == ISD::SUB &&

+ N0.getOperand(1).getOperand(1) == N1)

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ N0.getOperand(0), N0.getOperand(1).getOperand(0));

+ // If either operand of a sub is undef, the result is undef

+ if (N0.getOpcode() == ISD::UNDEF)

+ return N0;

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ // If the relocation model supports it, consider symbol offsets.

+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))

+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {

+ // fold (sub Sym, c) -> Sym-c

+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)

+ return DAG.getGlobalAddress(GA->getGlobal(), VT,

+ GA->getOffset() -

+ (uint64_t)N1C->getSExtValue());

+ // fold (sub Sym+c1, Sym+c2) -> c1-c2

+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))

+ if (GA->getGlobal() == GB->getGlobal())

+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),

+ VT);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitMUL(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (mul x, undef) -> 0

+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // fold (mul c1, c2) -> c1*c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);

+ // fold (mul x, 0) -> 0

+ if (N1C && N1C->isNullValue())

+ return N1;

+ // fold (mul x, -1) -> 0-x

+ if (N1C && N1C->isAllOnesValue())

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getConstant(0, VT), N0);

+ // fold (mul x, (1 << c)) -> x << c

+ if (N1C && N1C->getAPIntValue().isPowerOf2())

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(N1C->getAPIntValue().logBase2(),

+ getShiftAmountTy()));

+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c

+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {

+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();

+ // FIXME: If the input is something that is easily negated (e.g. a

+ // single-use add), we should put the negate there.

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getConstant(0, VT),

+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(Log2Val, getShiftAmountTy())));

+ }

+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)

+ if (N1C && N0.getOpcode() == ISD::SHL &&

+ isa<ConstantSDNode>(N0.getOperand(1))) {

+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,

+ N1, N0.getOperand(1));

+ AddToWorkList(C3.getNode());

+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,

+ N0.getOperand(0), C3);

+ }

+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one

+ // use.

+ {

+ SDValue Sh(0,0), Y(0,0);

+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).

+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&

+ N0.getNode()->hasOneUse()) {

+ Sh = N0; Y = N1;

+ } else if (N1.getOpcode() == ISD::SHL &&

+ isa<ConstantSDNode>(N1.getOperand(1)) &&

+ N1.getNode()->hasOneUse()) {

+ Sh = N1; Y = N0;

+ }

+ if (Sh.getNode()) {

+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,

+ Sh.getOperand(0), Y);

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,

+ Mul, Sh.getOperand(1));

+ }

+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)

+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&

+ isa<ConstantSDNode>(N0.getOperand(1)))

+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,

+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,

+ N0.getOperand(0), N1),

+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,

+ N0.getOperand(1), N1));

+ // reassociate mul

+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);

+ if (RMUL.getNode() != 0)

+ return RMUL;

+ return SDValue();

+SDValue DAGCombiner::visitSDIV(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (sdiv c1, c2) -> c1/c2

+ if (N0C && N1C && !N1C->isNullValue())

+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);

+ // fold (sdiv X, 1) -> X

+ if (N1C && N1C->getSExtValue() == 1LL)

+ return N0;

+ // fold (sdiv X, -1) -> 0-X

+ if (N1C && N1C->isAllOnesValue())

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getConstant(0, VT), N0);

+ // If we know the sign bits of both operands are zero, strength reduce to a

+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2

+ if (!VT.isVector()) {

+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),

+ N0, N1);

+ }

+ // fold (sdiv X, pow2) -> simple ops after legalize

+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&

+ (isPowerOf2_64(N1C->getSExtValue()) ||

+ isPowerOf2_64(-N1C->getSExtValue()))) {

+ // If dividing by powers of two is cheap, then don't perform the following

+ // fold.

+ if (TLI.isPow2DivCheap())

+ return SDValue();

+ int64_t pow2 = N1C->getSExtValue();

+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;

+ unsigned lg2 = Log2_64(abs2);

+ // Splat the sign bit into the register

+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(VT.getSizeInBits()-1,

+ getShiftAmountTy()));

+ AddToWorkList(SGN.getNode());

+ // Add (N0 < 0) ? abs2 - 1 : 0;

+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,

+ DAG.getConstant(VT.getSizeInBits() - lg2,

+ getShiftAmountTy()));

+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);

+ AddToWorkList(SRL.getNode());

+ AddToWorkList(ADD.getNode()); // Divide by pow2

+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,

+ DAG.getConstant(lg2, getShiftAmountTy()));

+ // If we're dividing by a positive value, we're done. Otherwise, we must

+ // negate the result.

+ if (pow2 > 0)

+ return SRA;

+ AddToWorkList(SRA.getNode());

+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,

+ DAG.getConstant(0, VT), SRA);

+ }

+ // if integer divide is expensive and we satisfy the requirements, emit an

+ // alternate sequence.

+ if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&

+ !TLI.isIntDivCheap()) {

+ SDValue Op = BuildSDIV(N);

+ if (Op.getNode()) return Op;

+ }

+ // undef / X -> 0

+ if (N0.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // X / undef -> undef

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ return SDValue();

+SDValue DAGCombiner::visitUDIV(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (udiv c1, c2) -> c1/c2

+ if (N0C && N1C && !N1C->isNullValue())

+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);

+ // fold (udiv x, (1 << c)) -> x >>u c

+ if (N1C && N1C->getAPIntValue().isPowerOf2())

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(N1C->getAPIntValue().logBase2(),

+ getShiftAmountTy()));

+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2

+ if (N1.getOpcode() == ISD::SHL) {

+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {

+ if (SHC->getAPIntValue().isPowerOf2()) {

+ MVT ADDVT = N1.getOperand(1).getValueType();

+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,

+ N1.getOperand(1),

+ DAG.getConstant(SHC->getAPIntValue()

+ .logBase2(),

+ ADDVT));

+ AddToWorkList(Add.getNode());

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);

+ }

+ // fold (udiv x, c) -> alternate

+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {

+ SDValue Op = BuildUDIV(N);

+ if (Op.getNode()) return Op;

+ }

+ // undef / X -> 0

+ if (N0.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // X / undef -> undef

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ return SDValue();

+SDValue DAGCombiner::visitSREM(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold (srem c1, c2) -> c1%c2

+ if (N0C && N1C && !N1C->isNullValue())

+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);

+ // If we know the sign bits of both operands are zero, strength reduce to a

+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15

+ if (!VT.isVector()) {

+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);

+ }

+ // If X/C can be simplified by the division-by-constant logic, lower

+ // X%C to the equivalent of X-X/C*C.

+ if (N1C && !N1C->isNullValue()) {

+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);

+ AddToWorkList(Div.getNode());

+ SDValue OptimizedDiv = combine(Div.getNode());

+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {

+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,

+ OptimizedDiv, N1);

+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);

+ AddToWorkList(Mul.getNode());

+ return Sub;

+ }

+ // undef % X -> 0

+ if (N0.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // X % undef -> undef

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ return SDValue();

+SDValue DAGCombiner::visitUREM(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold (urem c1, c2) -> c1%c2

+ if (N0C && N1C && !N1C->isNullValue())

+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);

+ // fold (urem x, pow2) -> (and x, pow2-1)

+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,

+ DAG.getConstant(N1C->getAPIntValue()-1,VT));

+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))

+ if (N1.getOpcode() == ISD::SHL) {

+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {

+ if (SHC->getAPIntValue().isPowerOf2()) {

+ SDValue Add =

+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,

+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),

+ VT));

+ AddToWorkList(Add.getNode());

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);

+ }

+ // If X/C can be simplified by the division-by-constant logic, lower

+ // X%C to the equivalent of X-X/C*C.

+ if (N1C && !N1C->isNullValue()) {

+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);

+ AddToWorkList(Div.getNode());

+ SDValue OptimizedDiv = combine(Div.getNode());

+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {

+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,

+ OptimizedDiv, N1);

+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);

+ AddToWorkList(Mul.getNode());

+ return Sub;

+ }

+ // undef % X -> 0

+ if (N0.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // X % undef -> undef

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ return SDValue();

+SDValue DAGCombiner::visitMULHS(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold (mulhs x, 0) -> 0

+ if (N1C && N1C->isNullValue())

+ return N1;

+ // fold (mulhs x, 1) -> (sra x, size(x)-1)

+ if (N1C && N1C->getAPIntValue() == 1)

+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,

+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,

+ getShiftAmountTy()));

+ // fold (mulhs x, undef) -> 0

+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ return SDValue();

+SDValue DAGCombiner::visitMULHU(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold (mulhu x, 0) -> 0

+ if (N1C && N1C->isNullValue())

+ return N1;

+ // fold (mulhu x, 1) -> 0

+ if (N1C && N1C->getAPIntValue() == 1)

+ return DAG.getConstant(0, N0.getValueType());

+ // fold (mulhu x, undef) -> 0

+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ return SDValue();

+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that

+/// compute two values. LoOp and HiOp give the opcodes for the two computations

+/// that are being performed. Return true if a simplification was made.

+///

+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,

+ unsigned HiOp) {

+ // If the high half is not needed, just compute the low half.

+ bool HiExists = N->hasAnyUseOfValue(1);

+ if (!HiExists &&

+ (!LegalOperations ||

+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {

+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),

+ N->op_begin(), N->getNumOperands());

+ return CombineTo(N, Res, Res);

+ }

+ // If the low half is not needed, just compute the high half.

+ bool LoExists = N->hasAnyUseOfValue(0);

+ if (!LoExists &&

+ (!LegalOperations ||

+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {

+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),

+ N->op_begin(), N->getNumOperands());

+ return CombineTo(N, Res, Res);

+ }

+ // If both halves are used, return as it is.

+ if (LoExists && HiExists)

+ return SDValue();

+ // If the two computed results can be simplified separately, separate them.

+ if (LoExists) {

+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),

+ N->op_begin(), N->getNumOperands());

+ AddToWorkList(Lo.getNode());

+ SDValue LoOpt = combine(Lo.getNode());

+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&

+ (!LegalOperations ||

+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))

+ return CombineTo(N, LoOpt, LoOpt);

+ }

+ if (HiExists) {

+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),

+ N->op_begin(), N->getNumOperands());

+ AddToWorkList(Hi.getNode());

+ SDValue HiOpt = combine(Hi.getNode());

+ if (HiOpt.getNode() && HiOpt != Hi &&

+ (!LegalOperations ||

+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))

+ return CombineTo(N, HiOpt, HiOpt);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {

+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);

+ if (Res.getNode()) return Res;

+ return SDValue();

+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {

+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);

+ if (Res.getNode()) return Res;

+ return SDValue();

+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {

+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);

+ if (Res.getNode()) return Res;

+ return SDValue();

+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {

+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);

+ if (Res.getNode()) return Res;

+ return SDValue();

+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with

+/// two operands of the same opcode, try to simplify it.

+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {

+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);

+ MVT VT = N0.getValueType();

+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");

+ // For each of OP in AND/OR/XOR:

+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))

+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))

+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))

+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)

+ if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||

+ N0.getOpcode() == ISD::SIGN_EXTEND ||

+ (N0.getOpcode() == ISD::TRUNCATE &&

+ !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&

+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {

+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),

+ N0.getOperand(0).getValueType(),

+ N0.getOperand(0), N1.getOperand(0));

+ AddToWorkList(ORNode.getNode());

+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);

+ }

+ // For each of OP in SHL/SRL/SRA/AND...

+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)

+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)

+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)

+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||

+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&

+ N0.getOperand(1) == N1.getOperand(1)) {

+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),

+ N0.getOperand(0).getValueType(),

+ N0.getOperand(0), N1.getOperand(0));

+ AddToWorkList(ORNode.getNode());

+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,

+ ORNode, N0.getOperand(1));

+ }

+ return SDValue();

+SDValue DAGCombiner::visitAND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue LL, LR, RL, RR, CC0, CC1;

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N1.getValueType();

+ unsigned BitWidth = VT.getSizeInBits();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (and x, undef) -> 0

+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // fold (and c1, c2) -> c1&c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);

+ // fold (and x, -1) -> x

+ if (N1C && N1C->isAllOnesValue())

+ return N0;

+ // if (and x, c) is known to be zero, return 0

+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),

+ APInt::getAllOnesValue(BitWidth)))

+ return DAG.getConstant(0, VT);

+ // reassociate and

+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);

+ if (RAND.getNode() != 0)

+ return RAND;

+ // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF

+ if (N1C && N0.getOpcode() == ISD::OR)

+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))

+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())

+ return N1;

+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.

+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {

+ SDValue N0Op0 = N0.getOperand(0);

+ APInt Mask = ~N1C->getAPIntValue();

+ Mask.trunc(N0Op0.getValueSizeInBits());

+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {

+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),

+ N0.getValueType(), N0Op0);

+ // Replace uses of the AND with uses of the Zero extend node.

+ CombineTo(N, Zext);

+ // We actually want to replace all uses of the any_extend with the

+ // zero_extend, to avoid duplicating things. This will later cause this

+ // AND to be folded.

+ CombineTo(N0.getNode(), Zext);

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))

+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){

+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();

+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();

+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&

+ LL.getValueType().isInteger()) {

+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)

+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {

+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),

+ LR.getValueType(), LL, RL);

+ AddToWorkList(ORNode.getNode());

+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);

+ }

+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)

+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {

+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),

+ LR.getValueType(), LL, RL);

+ AddToWorkList(ANDNode.getNode());

+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);

+ }

+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)

+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {

+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),

+ LR.getValueType(), LL, RL);

+ AddToWorkList(ORNode.getNode());

+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);

+ }

+ // canonicalize equivalent to ll == rl

+ if (LL == RR && LR == RL) {

+ Op1 = ISD::getSetCCSwappedOperands(Op1);

+ std::swap(RL, RR);

+ }

+ if (LL == RL && LR == RR) {

+ bool isInteger = LL.getValueType().isInteger();

+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);

+ if (Result != ISD::SETCC_INVALID &&

+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))

+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),

+ LL, LR, Result);

+ }

+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))

+ if (N0.getOpcode() == N1.getOpcode()) {

+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);

+ if (Tmp.getNode()) return Tmp;

+ }

+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)

+ // fold (and (sra)) -> (and (srl)) when possible.

+ if (!VT.isVector() &&

+ SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ // fold (zext_inreg (extload x)) -> (zextload x)

+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT EVT = LN0->getMemoryVT();

+ // If we zero all the possible extended bits, then we can turn this into

+ // a zextload if we are running before legalize or the operation is legal.

+ unsigned BitWidth = N1.getValueSizeInBits();

+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,

+ BitWidth - EVT.getSizeInBits())) &&

+ ((!LegalOperations && !LN0->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {

+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,

+ LN0->getChain(), LN0->getBasePtr(),

+ LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ AddToWorkList(N);

+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use

+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&

+ N0.hasOneUse()) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT EVT = LN0->getMemoryVT();

+ // If we zero all the possible extended bits, then we can turn this into

+ // a zextload if we are running before legalize or the operation is legal.

+ unsigned BitWidth = N1.getValueSizeInBits();

+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,

+ BitWidth - EVT.getSizeInBits())) &&

+ ((!LegalOperations && !LN0->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {

+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ AddToWorkList(N);

+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (and (load x), 255) -> (zextload x, i8)

+ // fold (and (extload x, i16), 255) -> (zextload x, i8)

+ if (N1C && N0.getOpcode() == ISD::LOAD) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&

+ LN0->isUnindexed() && N0.hasOneUse() &&

+ // Do not change the width of a volatile load.

+ !LN0->isVolatile()) {

+ MVT EVT = MVT::Other;

+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();

+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))

+ EVT = MVT::getIntegerVT(ActiveBits);

+ MVT LoadedVT = LN0->getMemoryVT();

+ // Do not generate loads of non-round integer types since these can

+ // be expensive (and would be wrong if the type is not byte sized).

+ if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&

+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {

+ MVT PtrType = N0.getOperand(1).getValueType();

+ // For big endian targets, we need to add an offset to the pointer to

+ // load the correct bytes. For little endian systems, we merely need to

+ // read fewer bytes from the same pointer.

+ unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;

+ unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;

+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;

+ unsigned Alignment = LN0->getAlignment();

+ SDValue NewPtr = LN0->getBasePtr();

+ if (TLI.isBigEndian()) {

+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,

+ NewPtr, DAG.getConstant(PtrOff, PtrType));

+ Alignment = MinAlign(Alignment, PtrOff);

+ }

+ AddToWorkList(NewPtr.getNode());

+ SDValue Load =

+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),

+ NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),

+ EVT, LN0->isVolatile(), Alignment);

+ AddToWorkList(N);

+ CombineTo(N0.getNode(), Load, Load.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ return SDValue();

+SDValue DAGCombiner::visitOR(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue LL, LR, RL, RR, CC0, CC1;

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N1.getValueType();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (or x, undef) -> -1

+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(~0ULL, VT);

+ // fold (or c1, c2) -> c1|c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);

+ // fold (or x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // fold (or x, -1) -> -1

+ if (N1C && N1C->isAllOnesValue())

+ return N1;

+ // fold (or x, c) -> c iff (x & ~c) == 0

+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))

+ return N1;

+ // reassociate or

+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);

+ if (ROR.getNode() != 0)

+ return ROR;

+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)

+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&

+ isa<ConstantSDNode>(N0.getOperand(1))) {

+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,

+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,

+ N0.getOperand(0), N1),

+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));

+ }

+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))

+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){

+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();

+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();

+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&

+ LL.getValueType().isInteger()) {

+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)

+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)

+ if (cast<ConstantSDNode>(LR)->isNullValue() &&

+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {

+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),

+ LR.getValueType(), LL, RL);

+ AddToWorkList(ORNode.getNode());

+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);

+ }

+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)

+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)

+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&

+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {

+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),

+ LR.getValueType(), LL, RL);

+ AddToWorkList(ANDNode.getNode());

+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);

+ }

+ // canonicalize equivalent to ll == rl

+ if (LL == RR && LR == RL) {

+ Op1 = ISD::getSetCCSwappedOperands(Op1);

+ std::swap(RL, RR);

+ }

+ if (LL == RL && LR == RR) {

+ bool isInteger = LL.getValueType().isInteger();

+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);

+ if (Result != ISD::SETCC_INVALID &&

+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))

+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),

+ LL, LR, Result);

+ }

+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))

+ if (N0.getOpcode() == N1.getOpcode()) {

+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);

+ if (Tmp.getNode()) return Tmp;

+ }

+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.

+ if (N0.getOpcode() == ISD::AND &&

+ N1.getOpcode() == ISD::AND &&

+ N0.getOperand(1).getOpcode() == ISD::Constant &&

+ N1.getOperand(1).getOpcode() == ISD::Constant &&

+ // Don't increase # computations.

+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {

+ // We can only do this xform if we know that bits from X that are set in C2

+ // but not in C1 are already zero. Likewise for Y.

+ const APInt &LHSMask =

+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();

+ const APInt &RHSMask =

+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();

+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&

+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {

+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,

+ N0.getOperand(0), N1.getOperand(0));

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,

+ DAG.getConstant(LHSMask | RHSMask, VT));

+ }

+ // See if this is some rotate idiom.

+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))

+ return SDValue(Rot, 0);

+ return SDValue();

+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.

+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {

+ if (Op.getOpcode() == ISD::AND) {

+ if (isa<ConstantSDNode>(Op.getOperand(1))) {

+ Mask = Op.getOperand(1);

+ Op = Op.getOperand(0);

+ } else {

+ return false;

+ }

+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {

+ Shift = Op;

+ return true;

+ }

+ return false;

+// MatchRotate - Handle an 'or' of two operands. If this is one of the many

+// idioms for rotate, and if the target supports rotation instructions, generate

+// a rot[lr].

+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {

+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.

+ MVT VT = LHS.getValueType();

+ if (!TLI.isTypeLegal(VT)) return 0;

+ // The target must have at least one rotate flavor.

+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);

+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);

+ if (!HasROTL && !HasROTR) return 0;

+ // Match "(X shl/srl V1) & V2" where V2 may not be present.

+ SDValue LHSShift; // The shift.

+ SDValue LHSMask; // AND value if any.

+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))

+ return 0; // Not part of a rotate.

+ SDValue RHSShift; // The shift.

+ SDValue RHSMask; // AND value if any.

+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))

+ return 0; // Not part of a rotate.

+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))

+ return 0; // Not shifting the same value.

+ if (LHSShift.getOpcode() == RHSShift.getOpcode())

+ return 0; // Shifts must disagree.

+ // Canonicalize shl to left side in a shl/srl pair.

+ if (RHSShift.getOpcode() == ISD::SHL) {

+ std::swap(LHS, RHS);

+ std::swap(LHSShift, RHSShift);

+ std::swap(LHSMask , RHSMask );

+ }

+ unsigned OpSizeInBits = VT.getSizeInBits();

+ SDValue LHSShiftArg = LHSShift.getOperand(0);

+ SDValue LHSShiftAmt = LHSShift.getOperand(1);

+ SDValue RHSShiftAmt = RHSShift.getOperand(1);

+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)

+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)

+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&

+ RHSShiftAmt.getOpcode() == ISD::Constant) {

+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();

+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();

+ if ((LShVal + RShVal) != OpSizeInBits)

+ return 0;

+ SDValue Rot;

+ if (HasROTL)

+ Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);

+ else

+ Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);

+ // If there is an AND of either shifted operand, apply it to the result.

+ if (LHSMask.getNode() || RHSMask.getNode()) {

+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);

+ if (LHSMask.getNode()) {

+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);

+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;

+ }

+ if (RHSMask.getNode()) {

+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);

+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;

+ }

+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));

+ }

+ return Rot.getNode();

+ }

+ // If there is a mask here, and we have a variable shift, we can't be sure

+ // that we're masking out the right stuff.

+ if (LHSMask.getNode() || RHSMask.getNode())

+ return 0;

+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)

+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))

+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&

+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {

+ if (ConstantSDNode *SUBC =

+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {

+ if (SUBC->getAPIntValue() == OpSizeInBits) {

+ if (HasROTL)

+ return DAG.getNode(ISD::ROTL, DL, VT,

+ LHSShiftArg, LHSShiftAmt).getNode();

+ else

+ return DAG.getNode(ISD::ROTR, DL, VT,

+ LHSShiftArg, RHSShiftAmt).getNode();

+ }

+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)

+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))

+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&

+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {

+ if (ConstantSDNode *SUBC =

+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {

+ if (SUBC->getAPIntValue() == OpSizeInBits) {

+ if (HasROTR)

+ return DAG.getNode(ISD::ROTR, DL, VT,

+ LHSShiftArg, RHSShiftAmt).getNode();

+ else

+ return DAG.getNode(ISD::ROTL, DL, VT,

+ LHSShiftArg, LHSShiftAmt).getNode();

+ }

+ // Look for sign/zext/any-extended or truncate cases:

+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND

+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND

+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND

+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&

+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND

+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND

+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND

+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {

+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);

+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);

+ if (RExtOp0.getOpcode() == ISD::SUB &&

+ RExtOp0.getOperand(1) == LExtOp0) {

+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->

+ // (rotl x, y)

+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->

+ // (rotr x, (sub 32, y))

+ if (ConstantSDNode *SUBC =

+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {

+ if (SUBC->getAPIntValue() == OpSizeInBits) {

+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,

+ LHSShiftArg,

+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();

+ }

+ } else if (LExtOp0.getOpcode() == ISD::SUB &&

+ RExtOp0 == LExtOp0.getOperand(1)) {

+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->

+ // (rotr x, y)

+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->

+ // (rotl x, (sub 32, y))

+ if (ConstantSDNode *SUBC =

+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {

+ if (SUBC->getAPIntValue() == OpSizeInBits) {

+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,

+ LHSShiftArg,

+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();

+ }

+ return 0;

+SDValue DAGCombiner::visitXOR(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue LHS, RHS, CC;

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).

+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)

+ return DAG.getConstant(0, VT);

+ // fold (xor x, undef) -> undef

+ if (N0.getOpcode() == ISD::UNDEF)

+ return N0;

+ if (N1.getOpcode() == ISD::UNDEF)

+ return N1;

+ // fold (xor c1, c2) -> c1^c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);

+ // canonicalize constant to RHS

+ if (N0C && !N1C)

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);

+ // fold (xor x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // reassociate xor

+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);

+ if (RXOR.getNode() != 0)

+ return RXOR;

+ // fold !(x cc y) -> (x !cc y)

+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {

+ bool isInt = LHS.getValueType().isInteger();

+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),

+ isInt);

+ if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {

+ switch (N0.getOpcode()) {

+ default:

+ assert(0 && "Unhandled SetCC Equivalent!");

+ abort();

+ case ISD::SETCC:

+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);

+ case ISD::SELECT_CC:

+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),

+ N0.getOperand(3), NotCC);

+ }

+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))

+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&

+ N0.getNode()->hasOneUse() &&

+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){

+ SDValue V = N0.getOperand(0);

+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,

+ DAG.getConstant(1, V.getValueType()));

+ AddToWorkList(V.getNode());

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);

+ }

+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc

+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&

+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {

+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);

+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {

+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;

+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS

+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS

+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());

+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);

+ }

+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants

+ if (N1C && N1C->isAllOnesValue() &&

+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {

+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);

+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {

+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;

+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS

+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS

+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());

+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);

+ }

+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))

+ if (N1C && N0.getOpcode() == ISD::XOR) {

+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));

+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));

+ if (N00C)

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),

+ DAG.getConstant(N1C->getAPIntValue() ^

+ N00C->getAPIntValue(), VT));

+ if (N01C)

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(N1C->getAPIntValue() ^

+ N01C->getAPIntValue(), VT));

+ }

+ // fold (xor x, x) -> 0

+ if (N0 == N1) {

+ if (!VT.isVector()) {

+ return DAG.getConstant(0, VT);

+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){

+ // Produce a vector of zeros.

+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());

+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);

+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,

+ &Ops[0], Ops.size());

+ }

+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))

+ if (N0.getOpcode() == N1.getOpcode()) {

+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);

+ if (Tmp.getNode()) return Tmp;

+ }

+ // Simplify the expression using non-local knowledge.

+ if (!VT.isVector() &&

+ SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ return SDValue();

+/// visitShiftByConstant - Handle transforms common to the three shifts, when

+/// the shift amount is a constant.

+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {

+ SDNode *LHS = N->getOperand(0).getNode();

+ if (!LHS->hasOneUse()) return SDValue();

+ // We want to pull some binops through shifts, so that we have (and (shift))

+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of

+ // thing happens with address calculations, so it's important to canonicalize

+ // it.

+ bool HighBitSet = false; // Can we transform this if the high bit is set?

+ switch (LHS->getOpcode()) {

+ default: return SDValue();

+ case ISD::OR:

+ case ISD::XOR:

+ HighBitSet = false; // We can only transform sra if the high bit is clear.

+ break;

+ case ISD::AND:

+ HighBitSet = true; // We can only transform sra if the high bit is set.

+ break;

+ case ISD::ADD:

+ if (N->getOpcode() != ISD::SHL)

+ return SDValue(); // only shl(add) not sr[al](add).

+ HighBitSet = false; // We can only transform sra if the high bit is clear.

+ break;

+ }

+ // We require the RHS of the binop to be a constant as well.

+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));

+ if (!BinOpCst) return SDValue();

+ // FIXME: disable this unless the input to the binop is a shift by a constant.

+ // If it is not a shift, it pessimizes some common cases like:

+ //

+ // void foo(int *X, int i) { X[i & 1235] = 1; }

+ // int bar(int *X, int i) { return X[i & 255]; }

+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();

+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&

+ BinOpLHSVal->getOpcode() != ISD::SRA &&

+ BinOpLHSVal->getOpcode() != ISD::SRL) ||

+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))

+ return SDValue();

+ MVT VT = N->getValueType(0);

+ // If this is a signed shift right, and the high bit is modified by the

+ // logical operation, do not perform the transformation. The highBitSet

+ // boolean indicates the value of the high bit of the constant which would

+ // cause it to be modified for this operation.

+ if (N->getOpcode() == ISD::SRA) {

+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();

+ if (BinOpRHSSignSet != HighBitSet)

+ return SDValue();

+ }

+ // Fold the constants, shifting the binop RHS by the shift amount.

+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),

+ N->getValueType(0),

+ LHS->getOperand(1), N->getOperand(1));

+ // Create the new shift.

+ SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),

+ VT, LHS->getOperand(0), N->getOperand(1));

+ // Create the new binop.

+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);

+SDValue DAGCombiner::visitSHL(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ unsigned OpSizeInBits = VT.getSizeInBits();

+ // fold (shl c1, c2) -> c1<<c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);

+ // fold (shl 0, x) -> 0

+ if (N0C && N0C->isNullValue())

+ return N0;

+ // fold (shl x, c >= size(x)) -> undef

+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)

+ return DAG.getUNDEF(VT);

+ // fold (shl x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // if (shl x, c) is known to be zero, return 0

+ if (DAG.MaskedValueIsZero(SDValue(N, 0),

+ APInt::getAllOnesValue(VT.getSizeInBits())))

+ return DAG.getConstant(0, VT);

+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).

+ if (N1.getOpcode() == ISD::TRUNCATE &&

+ N1.getOperand(0).getOpcode() == ISD::AND &&

+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {

+ SDValue N101 = N1.getOperand(0).getOperand(1);

+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {

+ MVT TruncVT = N1.getValueType();

+ SDValue N100 = N1.getOperand(0).getOperand(0);

+ APInt TruncC = N101C->getAPIntValue();

+ TruncC.trunc(TruncVT.getSizeInBits());

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,

+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,

+ DAG.getNode(ISD::TRUNCATE,

+ N->getDebugLoc(),

+ TruncVT, N100),

+ DAG.getConstant(TruncC, TruncVT)));

+ }

+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))

+ if (N1C && N0.getOpcode() == ISD::SHL &&

+ N0.getOperand(1).getOpcode() == ISD::Constant) {

+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();

+ uint64_t c2 = N1C->getZExtValue();

+ if (c1 + c2 > OpSizeInBits)

+ return DAG.getConstant(0, VT);

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(c1 + c2, N1.getValueType()));

+ }

+ // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or

+ // (srl (and x, (shl -1, c1)), (sub c1, c2))

+ if (N1C && N0.getOpcode() == ISD::SRL &&

+ N0.getOperand(1).getOpcode() == ISD::Constant) {

+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();

+ uint64_t c2 = N1C->getZExtValue();

+ SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(~0ULL << c1, VT));

+ if (c2 > c1)

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,

+ DAG.getConstant(c2-c1, N1.getValueType()));

+ else

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,

+ DAG.getConstant(c1-c2, N1.getValueType()));

+ }

+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))

+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));

+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();

+SDValue DAGCombiner::visitSRA(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ // fold (sra c1, c2) -> (sra c1, c2)

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);

+ // fold (sra 0, x) -> 0

+ if (N0C && N0C->isNullValue())

+ return N0;

+ // fold (sra -1, x) -> -1

+ if (N0C && N0C->isAllOnesValue())

+ return N0;

+ // fold (sra x, (setge c, size(x))) -> undef

+ if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())

+ return DAG.getUNDEF(VT);

+ // fold (sra x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports

+ // sext_inreg.

+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {

+ unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();

+ MVT EVT = MVT::getIntegerVT(LowBits);

+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,

+ N0.getOperand(0), DAG.getValueType(EVT));

+ }

+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))

+ if (N1C && N0.getOpcode() == ISD::SRA) {

+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();

+ if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;

+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(Sum, N1C->getValueType(0)));

+ }

+ // fold (sra (shl X, m), (sub result_size, n))

+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for

+ // result_size - n != m.

+ // If truncate is free for the target sext(shl) is likely to result in better

+ // code.

+ if (N0.getOpcode() == ISD::SHL) {

+ // Get the two constanst of the shifts, CN0 = m, CN = n.

+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));

+ if (N01C && N1C) {

+ // Determine what the truncate's result bitsize and type would be.

+ unsigned VTValSize = VT.getSizeInBits();

+ MVT TruncVT =

+ MVT::getIntegerVT(VTValSize - N1C->getZExtValue());

+ // Determine the residual right-shift amount.

+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();

+ // If the shift is not a no-op (in which case this should be just a sign

+ // extend already), the truncated to type is legal, sign_extend is legal

+ // on that type, and the the truncate to that type is both legal and free,

+ // perform the transform.

+ if ((ShiftAmt > 0) &&

+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&

+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&

+ TLI.isTruncateFree(VT, TruncVT)) {

+ SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());

+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,

+ N0.getOperand(0), Amt);

+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,

+ Shift);

+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),

+ N->getValueType(0), Trunc);

+ }

+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).

+ if (N1.getOpcode() == ISD::TRUNCATE &&

+ N1.getOperand(0).getOpcode() == ISD::AND &&

+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {

+ SDValue N101 = N1.getOperand(0).getOperand(1);

+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {

+ MVT TruncVT = N1.getValueType();

+ SDValue N100 = N1.getOperand(0).getOperand(0);

+ APInt TruncC = N101C->getAPIntValue();

+ TruncC.trunc(TruncVT.getSizeInBits());

+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,

+ DAG.getNode(ISD::AND, N->getDebugLoc(),

+ TruncVT,

+ DAG.getNode(ISD::TRUNCATE,

+ N->getDebugLoc(),

+ TruncVT, N100),

+ DAG.getConstant(TruncC, TruncVT)));

+ }

+ // Simplify, based on bits shifted out of the LHS.

+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ // If the sign bit is known to be zero, switch this to a SRL.

+ if (DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);

+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();

+SDValue DAGCombiner::visitSRL(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ MVT VT = N0.getValueType();

+ unsigned OpSizeInBits = VT.getSizeInBits();

+ // fold (srl c1, c2) -> c1 >>u c2

+ if (N0C && N1C)

+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);

+ // fold (srl 0, x) -> 0

+ if (N0C && N0C->isNullValue())

+ return N0;

+ // fold (srl x, c >= size(x)) -> undef

+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)

+ return DAG.getUNDEF(VT);

+ // fold (srl x, 0) -> x

+ if (N1C && N1C->isNullValue())

+ return N0;

+ // if (srl x, c) is known to be zero, return 0

+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),

+ APInt::getAllOnesValue(OpSizeInBits)))

+ return DAG.getConstant(0, VT);

+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))

+ if (N1C && N0.getOpcode() == ISD::SRL &&

+ N0.getOperand(1).getOpcode() == ISD::Constant) {

+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();

+ uint64_t c2 = N1C->getZExtValue();

+ if (c1 + c2 > OpSizeInBits)

+ return DAG.getConstant(0, VT);

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getConstant(c1 + c2, N1.getValueType()));

+ }

+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))

+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {

+ // Shifting in all undef bits?

+ MVT SmallVT = N0.getOperand(0).getValueType();

+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())

+ return DAG.getUNDEF(VT);

+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,

+ N0.getOperand(0), N1);

+ AddToWorkList(SmallShift.getNode());

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);

+ }

+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign

+ // bit, which is unmodified by sra.

+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {

+ if (N0.getOpcode() == ISD::SRA)

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);

+ }

+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).

+ if (N1C && N0.getOpcode() == ISD::CTLZ &&

+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {

+ APInt KnownZero, KnownOne;

+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());

+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);

+ // If any of the input bits are KnownOne, then the input couldn't be all

+ // zeros, thus the result of the srl will always be zero.

+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);

+ // If all of the bits input the to ctlz node are known to be zero, then

+ // the result of the ctlz is "32" and the result of the shift is one.

+ APInt UnknownBits = ~KnownZero & Mask;

+ if (UnknownBits == 0) return DAG.getConstant(1, VT);

+ // Otherwise, check to see if there is exactly one bit input to the ctlz.

+ if ((UnknownBits & (UnknownBits - 1)) == 0) {

+ // Okay, we know that only that the single bit specified by UnknownBits

+ // could be set on input to the CTLZ node. If this bit is set, the SRL

+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair

+ // to an SRL/XOR pair, which is likely to simplify more.

+ unsigned ShAmt = UnknownBits.countTrailingZeros();

+ SDValue Op = N0.getOperand(0);

+ if (ShAmt) {

+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,

+ DAG.getConstant(ShAmt, getShiftAmountTy()));

+ AddToWorkList(Op.getNode());

+ }

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,

+ Op, DAG.getConstant(1, VT));

+ }

+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).

+ if (N1.getOpcode() == ISD::TRUNCATE &&

+ N1.getOperand(0).getOpcode() == ISD::AND &&

+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {

+ SDValue N101 = N1.getOperand(0).getOperand(1);

+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {

+ MVT TruncVT = N1.getValueType();

+ SDValue N100 = N1.getOperand(0).getOperand(0);

+ APInt TruncC = N101C->getAPIntValue();

+ TruncC.trunc(TruncVT.getSizeInBits());

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,

+ DAG.getNode(ISD::AND, N->getDebugLoc(),

+ TruncVT,

+ DAG.getNode(ISD::TRUNCATE,

+ N->getDebugLoc(),

+ TruncVT, N100),

+ DAG.getConstant(TruncC, TruncVT)));

+ }

+ // fold operands of srl based on knowledge that the low bits are not

+ // demanded.

+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();

+SDValue DAGCombiner::visitCTLZ(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (ctlz c1) -> c2

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitCTTZ(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (cttz c1) -> c2

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitCTPOP(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (ctpop c1) -> c2

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitSELECT(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue N2 = N->getOperand(2);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);

+ MVT VT = N->getValueType(0);

+ MVT VT0 = N0.getValueType();

+ // fold (select C, X, X) -> X

+ if (N1 == N2)

+ return N1;

+ // fold (select true, X, Y) -> X

+ if (N0C && !N0C->isNullValue())

+ return N1;

+ // fold (select false, X, Y) -> Y

+ if (N0C && N0C->isNullValue())

+ return N2;

+ // fold (select C, 1, X) -> (or C, X)

+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);

+ // fold (select C, 0, 1) -> (xor C, 1)

+ if (VT.isInteger() &&

+ (VT0 == MVT::i1 ||

+ (VT0.isInteger() &&

+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&

+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {

+ SDValue XORNode;

+ if (VT == VT0)

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,

+ N0, DAG.getConstant(1, VT0));

+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,

+ N0, DAG.getConstant(1, VT0));

+ AddToWorkList(XORNode.getNode());

+ if (VT.bitsGT(VT0))

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);

+ }

+ // fold (select C, 0, X) -> (and (not C), X)

+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {

+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);

+ AddToWorkList(NOTNode.getNode());

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);

+ }

+ // fold (select C, X, 1) -> (or (not C), X)

+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {

+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);

+ AddToWorkList(NOTNode.getNode());

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);

+ }

+ // fold (select C, X, 0) -> (and C, X)

+ if (VT == MVT::i1 && N2C && N2C->isNullValue())

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);

+ // fold (select X, X, Y) -> (or X, Y)

+ // fold (select X, 1, Y) -> (or X, Y)

+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);

+ // fold (select X, Y, X) -> (and X, Y)

+ // fold (select X, Y, 0) -> (and X, Y)

+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);

+ // If we can fold this based on the true/false value, do so.

+ if (SimplifySelectOps(N, N1, N2))

+ return SDValue(N, 0); // Don't revisit N.

+ // fold selects based on a setcc into other things, such as min/max/abs

+ if (N0.getOpcode() == ISD::SETCC) {

+ // FIXME:

+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets

+ // having to say they don't support SELECT_CC on every type the DAG knows

+ // about, since there is no way to mark an opcode illegal at all value types

+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,

+ N0.getOperand(0), N0.getOperand(1),

+ N1, N2, N0.getOperand(2));

+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue N2 = N->getOperand(2);

+ SDValue N3 = N->getOperand(3);

+ SDValue N4 = N->getOperand(4);

+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();

+ // fold select_cc lhs, rhs, x, x, cc -> x

+ if (N2 == N3)

+ return N2;

+ // Determine if the condition we're dealing with is constant

+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),

+ N0, N1, CC, N->getDebugLoc(), false);

+ if (SCC.getNode()) AddToWorkList(SCC.getNode());

+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {

+ if (!SCCC->isNullValue())

+ return N2; // cond always true -> true val

+ else

+ return N3; // cond always false -> false val

+ }

+ // Fold to a simpler select_cc

+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),

+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,

+ SCC.getOperand(2));

+ // If we can fold this based on the true/false value, do so.

+ if (SimplifySelectOps(N, N2, N3))

+ return SDValue(N, 0); // Don't revisit N.

+ // fold select_cc into other things, such as min/max/abs

+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);

+SDValue DAGCombiner::visitSETCC(SDNode *N) {

+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),

+ cast<CondCodeSDNode>(N->getOperand(2))->get(),

+ N->getDebugLoc());

+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:

+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"

+// transformation. Returns true if extension are possible and the above

+// mentioned transformation is profitable.

+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,

+ unsigned ExtOpc,

+ SmallVector<SDNode*, 4> &ExtendNodes,

+ const TargetLowering &TLI) {

+ bool HasCopyToRegUses = false;

+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());

+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),

+ UE = N0.getNode()->use_end();

+ UI != UE; ++UI) {

+ SDNode *User = *UI;

+ if (User == N)

+ continue;

+ if (UI.getUse().getResNo() != N0.getResNo())

+ continue;

+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.

+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {

+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();

+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))

+ // Sign bits will be lost after a zext.

+ return false;

+ bool Add = false;

+ for (unsigned i = 0; i != 2; ++i) {

+ SDValue UseOp = User->getOperand(i);

+ if (UseOp == N0)

+ continue;

+ if (!isa<ConstantSDNode>(UseOp))

+ return false;

+ Add = true;

+ }

+ if (Add)

+ ExtendNodes.push_back(User);

+ continue;

+ }

+ // If truncates aren't free and there are users we can't

+ // extend, it isn't worthwhile.

+ if (!isTruncFree)

+ return false;

+ // Remember if this value is live-out.

+ if (User->getOpcode() == ISD::CopyToReg)

+ HasCopyToRegUses = true;

+ }

+ if (HasCopyToRegUses) {

+ bool BothLiveOut = false;

+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

+ UI != UE; ++UI) {

+ SDUse &Use = UI.getUse();

+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {

+ BothLiveOut = true;

+ break;

+ }

+ if (BothLiveOut)

+ // Both unextended and extended values are live out. There had better be

+ // good a reason for the transformation.

+ return ExtendNodes.size();

+ }

+ return true;

+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (sext c1) -> c1

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);

+ // fold (sext (sext x)) -> (sext x)

+ // fold (sext (aext x)) -> (sext x)

+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)

+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,

+ N0.getOperand(0));

+ if (N0.getOpcode() == ISD::TRUNCATE) {

+ // fold (sext (truncate (load x))) -> (sext (smaller load x))

+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))

+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());

+ if (NarrowLoad.getNode()) {

+ if (NarrowLoad.getNode() != N0.getNode())

+ CombineTo(N0.getNode(), NarrowLoad);

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // See if the value being truncated is already sign extended. If so, just

+ // eliminate the trunc/sext pair.

+ SDValue Op = N0.getOperand(0);

+ unsigned OpBits = Op.getValueType().getSizeInBits();

+ unsigned MidBits = N0.getValueType().getSizeInBits();

+ unsigned DestBits = VT.getSizeInBits();

+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);

+ if (OpBits == DestBits) {

+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign

+ // bits, it is already ready.

+ if (NumSignBits > DestBits-MidBits)

+ return Op;

+ } else if (OpBits < DestBits) {

+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign

+ // bits, just sext from i32.

+ if (NumSignBits > OpBits-MidBits)

+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);

+ } else {

+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign

+ // bits, just truncate to i32.

+ if (NumSignBits > OpBits-MidBits)

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);

+ }

+ // fold (sext (truncate x)) -> (sextinreg x).

+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,

+ N0.getValueType())) {

+ if (Op.getValueType().bitsLT(VT))

+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);

+ else if (Op.getValueType().bitsGT(VT))

+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,

+ DAG.getValueType(N0.getValueType()));

+ }

+ // fold (sext (load x)) -> (sext (truncate (sextload x)))

+ if (ISD::isNON_EXTLoad(N0.getNode()) &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {

+ bool DoXform = true;

+ SmallVector<SDNode*, 4> SetCCs;

+ if (!N0.hasOneUse())

+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);

+ if (DoXform) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(),

+ N0.getValueType(),

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad);

+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));

+ // Extend SetCC uses if necessary.

+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {

+ SDNode *SetCC = SetCCs[i];

+ SmallVector<SDValue, 4> Ops;

+ for (unsigned j = 0; j != 2; ++j) {

+ SDValue SOp = SetCC->getOperand(j);

+ if (SOp == Trunc)

+ Ops.push_back(ExtLoad);

+ else

+ Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,

+ N->getDebugLoc(), VT, SOp));

+ }

+ Ops.push_back(SetCC->getOperand(2));

+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),

+ SetCC->getValueType(0),

+ &Ops[0], Ops.size()));

+ }

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))

+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))

+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&

+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT EVT = LN0->getMemoryVT();

+ if ((!LegalOperations && !LN0->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {

+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(),

+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad),

+ ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)

+ if (N0.getOpcode() == ISD::SETCC) {

+ SDValue SCC =

+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),

+ DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),

+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);

+ if (SCC.getNode()) return SCC;

+ }

+ // fold (sext x) -> (zext x) if the sign bit is known zero.

+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&

+ DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (zext c1) -> c1

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);

+ // fold (zext (zext x)) -> (zext x)

+ // fold (zext (aext x)) -> (zext x)

+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,

+ N0.getOperand(0));

+ // fold (zext (truncate (load x))) -> (zext (smaller load x))

+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))

+ if (N0.getOpcode() == ISD::TRUNCATE) {

+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());

+ if (NarrowLoad.getNode()) {

+ if (NarrowLoad.getNode() != N0.getNode())

+ CombineTo(N0.getNode(), NarrowLoad);

+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);

+ }

+ // fold (zext (truncate x)) -> (and x, mask)

+ if (N0.getOpcode() == ISD::TRUNCATE &&

+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {

+ SDValue Op = N0.getOperand(0);

+ if (Op.getValueType().bitsLT(VT)) {

+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);

+ } else if (Op.getValueType().bitsGT(VT)) {

+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);

+ }

+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());

+ }

+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),

+ // if either of the casts is not free.

+ if (N0.getOpcode() == ISD::AND &&

+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&

+ N0.getOperand(1).getOpcode() == ISD::Constant &&

+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),

+ N0.getValueType()) ||

+ !TLI.isZExtFree(N0.getValueType(), VT))) {

+ SDValue X = N0.getOperand(0).getOperand(0);

+ if (X.getValueType().bitsLT(VT)) {

+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);

+ } else if (X.getValueType().bitsGT(VT)) {

+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);

+ }

+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();

+ Mask.zext(VT.getSizeInBits());

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,

+ X, DAG.getConstant(Mask, VT));

+ }

+ // fold (zext (load x)) -> (zext (truncate (zextload x)))

+ if (ISD::isNON_EXTLoad(N0.getNode()) &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {

+ bool DoXform = true;

+ SmallVector<SDNode*, 4> SetCCs;

+ if (!N0.hasOneUse())

+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);

+ if (DoXform) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(),

+ N0.getValueType(),

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad);

+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));

+ // Extend SetCC uses if necessary.

+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {

+ SDNode *SetCC = SetCCs[i];

+ SmallVector<SDValue, 4> Ops;

+ for (unsigned j = 0; j != 2; ++j) {

+ SDValue SOp = SetCC->getOperand(j);

+ if (SOp == Trunc)

+ Ops.push_back(ExtLoad);

+ else

+ Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,

+ N->getDebugLoc(), VT, SOp));

+ }

+ Ops.push_back(SetCC->getOperand(2));

+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),

+ SetCC->getValueType(0),

+ &Ops[0], Ops.size()));

+ }

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))

+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))

+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&

+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT EVT = LN0->getMemoryVT();

+ if ((!LegalOperations && !LN0->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {

+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(),

+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),

+ ExtLoad),

+ ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc

+ if (N0.getOpcode() == ISD::SETCC) {

+ SDValue SCC =

+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),

+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),

+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);

+ if (SCC.getNode()) return SCC;

+ }

+ return SDValue();

+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // fold (aext c1) -> c1

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);

+ // fold (aext (aext x)) -> (aext x)

+ // fold (aext (zext x)) -> (zext x)

+ // fold (aext (sext x)) -> (sext x)

+ if (N0.getOpcode() == ISD::ANY_EXTEND ||

+ N0.getOpcode() == ISD::ZERO_EXTEND ||

+ N0.getOpcode() == ISD::SIGN_EXTEND)

+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));

+ // fold (aext (truncate (load x))) -> (aext (smaller load x))

+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))

+ if (N0.getOpcode() == ISD::TRUNCATE) {

+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());

+ if (NarrowLoad.getNode()) {

+ if (NarrowLoad.getNode() != N0.getNode())

+ CombineTo(N0.getNode(), NarrowLoad);

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);

+ }

+ // fold (aext (truncate x))

+ if (N0.getOpcode() == ISD::TRUNCATE) {

+ SDValue TruncOp = N0.getOperand(0);

+ if (TruncOp.getValueType() == VT)

+ return TruncOp; // x iff x size == zext size.

+ if (TruncOp.getValueType().bitsGT(VT))

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);

+ }

+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)

+ // if the trunc is not free.

+ if (N0.getOpcode() == ISD::AND &&

+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&

+ N0.getOperand(1).getOpcode() == ISD::Constant &&

+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),

+ N0.getValueType())) {

+ SDValue X = N0.getOperand(0).getOperand(0);

+ if (X.getValueType().bitsLT(VT)) {

+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);

+ } else if (X.getValueType().bitsGT(VT)) {

+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);

+ }

+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();

+ Mask.zext(VT.getSizeInBits());

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,

+ X, DAG.getConstant(Mask, VT));

+ }

+ // fold (aext (load x)) -> (aext (truncate (extload x)))

+ if (ISD::isNON_EXTLoad(N0.getNode()) &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {

+ bool DoXform = true;

+ SmallVector<SDNode*, 4> SetCCs;

+ if (!N0.hasOneUse())

+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);

+ if (DoXform) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(),

+ N0.getValueType(),

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad);

+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));

+ // Extend SetCC uses if necessary.

+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {

+ SDNode *SetCC = SetCCs[i];

+ SmallVector<SDValue, 4> Ops;

+ for (unsigned j = 0; j != 2; ++j) {

+ SDValue SOp = SetCC->getOperand(j);

+ if (SOp == Trunc)

+ Ops.push_back(ExtLoad);

+ else

+ Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,

+ N->getDebugLoc(), VT, SOp));

+ }

+ Ops.push_back(SetCC->getOperand(2));

+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),

+ SetCC->getValueType(0),

+ &Ops[0], Ops.size()));

+ }

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))

+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))

+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))

+ if (N0.getOpcode() == ISD::LOAD &&

+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&

+ N0.hasOneUse()) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT EVT = LN0->getMemoryVT();

+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),

+ VT, LN0->getChain(), LN0->getBasePtr(),

+ LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(),

+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad),

+ ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc

+ if (N0.getOpcode() == ISD::SETCC) {

+ SDValue SCC =

+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),

+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),

+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);

+ if (SCC.getNode())

+ return SCC;

+ }

+ return SDValue();

+/// GetDemandedBits - See if the specified operand can be simplified with the

+/// knowledge that only the bits specified by Mask are used. If so, return the

+/// simpler operand, otherwise return a null SDValue.

+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {

+ switch (V.getOpcode()) {

+ default: break;

+ case ISD::OR:

+ case ISD::XOR:

+ // If the LHS or RHS don't contribute bits to the or, drop them.

+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))

+ return V.getOperand(1);

+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))

+ return V.getOperand(0);

+ break;

+ case ISD::SRL:

+ // Only look at single-use SRLs.

+ if (!V.getNode()->hasOneUse())

+ break;

+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {

+ // See if we can recursively simplify the LHS.

+ unsigned Amt = RHSC->getZExtValue();

+ // Watch out for shift count overflow though.

+ if (Amt >= Mask.getBitWidth()) break;

+ APInt NewMask = Mask << Amt;

+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);

+ if (SimplifyLHS.getNode())

+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),

+ SimplifyLHS, V.getOperand(1));

+ }

+ return SDValue();

+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N

+/// bits and then truncated to a narrower type and where N is a multiple

+/// of number of bits of the narrower type, transform it to a narrower load

+/// from address + N / num of bits of new type. If the result is to be

+/// extended, also fold the extension to form a extending load.

+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {

+ unsigned Opc = N->getOpcode();

+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ MVT EVT = VT;

+ // This transformation isn't valid for vector loads.

+ if (VT.isVector())

+ return SDValue();

+ // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then

+ // extended to VT.

+ if (Opc == ISD::SIGN_EXTEND_INREG) {

+ ExtType = ISD::SEXTLOAD;

+ EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

+ if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))

+ return SDValue();

+ }

+ unsigned EVTBits = EVT.getSizeInBits();

+ unsigned ShAmt = 0;

+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {

+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

+ ShAmt = N01->getZExtValue();

+ // Is the shift amount a multiple of size of VT?

+ if ((ShAmt & (EVTBits-1)) == 0) {

+ N0 = N0.getOperand(0);

+ if (N0.getValueType().getSizeInBits() <= EVTBits)

+ return SDValue();

+ }

+ // Do not generate loads of non-round integer types since these can

+ // be expensive (and would be wrong if the type is not byte sized).

+ if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&

+ cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&

+ // Do not change the width of a volatile load.

+ !cast<LoadSDNode>(N0)->isVolatile()) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ MVT PtrType = N0.getOperand(1).getValueType();

+ // For big endian targets, we need to adjust the offset to the pointer to

+ // load the correct bytes.

+ if (TLI.isBigEndian()) {

+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();

+ unsigned EVTStoreBits = EVT.getStoreSizeInBits();

+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;

+ }

+ uint64_t PtrOff = ShAmt / 8;

+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);

+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),

+ PtrType, LN0->getBasePtr(),

+ DAG.getConstant(PtrOff, PtrType));

+ AddToWorkList(NewPtr.getNode());

+ SDValue Load = (ExtType == ISD::NON_EXTLOAD)

+ ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,

+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,

+ LN0->isVolatile(), NewAlign)

+ : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,

+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,

+ EVT, LN0->isVolatile(), NewAlign);

+ // Replace the old load's chain with the new load's chain.

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),

+ &DeadNodes);

+ // Return the new loaded value.

+ return Load;

+ }

+ return SDValue();

+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ MVT VT = N->getValueType(0);

+ MVT EVT = cast<VTSDNode>(N1)->getVT();

+ unsigned VTBits = VT.getSizeInBits();

+ unsigned EVTBits = EVT.getSizeInBits();

+ // fold (sext_in_reg c1) -> c1

+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);

+ // If the input is already sign extended, just drop the extension.

+ if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)

+ return N0;

+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2

+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&

+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,

+ N0.getOperand(0), N1);

+ }

+ // fold (sext_in_reg (sext x)) -> (sext x)

+ // fold (sext_in_reg (aext x)) -> (sext x)

+ // if x is small enough.

+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {

+ SDValue N00 = N0.getOperand(0);

+ if (N00.getValueType().getSizeInBits() < EVTBits)

+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);

+ }

+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.

+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))

+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);

+ // fold operands of sext_in_reg based on knowledge that the top bits are not

+ // demanded.

+ if (SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

+ // fold (sext_in_reg (load x)) -> (smaller sextload x)

+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))

+ SDValue NarrowLoad = ReduceLoadWidth(N);

+ if (NarrowLoad.getNode())

+ return NarrowLoad;

+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)

+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.

+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.

+ if (N0.getOpcode() == ISD::SRL) {

+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))

+ if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {

+ // We can turn this into an SRA iff the input to the SRL is already sign

+ // extended enough.

+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));

+ if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)

+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,

+ N0.getOperand(0), N0.getOperand(1));

+ }

+ // fold (sext_inreg (extload x)) -> (sextload x)

+ if (ISD::isEXTLoad(N0.getNode()) &&

+ ISD::isUNINDEXEDLoad(N0.getNode()) &&

+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use

+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&

+ N0.hasOneUse() &&

+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(), EVT,

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ return SDValue();

+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // noop truncate

+ if (N0.getValueType() == N->getValueType(0))

+ return N0;

+ // fold (truncate c1) -> c1

+ if (isa<ConstantSDNode>(N0))

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);

+ // fold (truncate (truncate x)) -> (truncate x)

+ if (N0.getOpcode() == ISD::TRUNCATE)

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));

+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x

+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||

+ N0.getOpcode() == ISD::ANY_EXTEND) {

+ if (N0.getOperand(0).getValueType().bitsLT(VT))

+ // if the source is smaller than the dest, we still need an extend

+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,

+ N0.getOperand(0));

+ else if (N0.getOperand(0).getValueType().bitsGT(VT))

+ // if the source is larger than the dest, than we just need the truncate

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));

+ else

+ // if the source and dest are the same type, we can drop both the extend

+ // and the truncate

+ return N0.getOperand(0);

+ }

+ // See if we can simplify the input to this truncate through knowledge that

+ // only the low bits are being used. For example "trunc (or (shl x, 8), y)"

+ // -> trunc y

+ SDValue Shorter =

+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),

+ VT.getSizeInBits()));

+ if (Shorter.getNode())

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);

+ // fold (truncate (load x)) -> (smaller load x)

+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))

+ return ReduceLoadWidth(N);

+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {

+ SDValue Elt = N->getOperand(i);

+ if (Elt.getOpcode() != ISD::MERGE_VALUES)

+ return Elt.getNode();

+ return Elt.getOperand(Elt.getResNo()).getNode();

+/// CombineConsecutiveLoads - build_pair (load, load) -> load

+/// if load locations are consecutive.

+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {

+ assert(N->getOpcode() == ISD::BUILD_PAIR);

+ SDNode *LD1 = getBuildPairElt(N, 0);

+ if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())

+ return SDValue();

+ MVT LD1VT = LD1->getValueType(0);

+ SDNode *LD2 = getBuildPairElt(N, 1);

+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

+ if (ISD::isNON_EXTLoad(LD2) &&

+ LD2->hasOneUse() &&

+ // If both are volatile this would reduce the number of volatile loads.

+ // If one is volatile it might be ok, but play conservative and bail out.

+ !cast<LoadSDNode>(LD1)->isVolatile() &&

+ !cast<LoadSDNode>(LD2)->isVolatile() &&

+ TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {

+ LoadSDNode *LD = cast<LoadSDNode>(LD1);

+ unsigned Align = LD->getAlignment();

+ unsigned NewAlign = TLI.getTargetData()->

+ getABITypeAlignment(VT.getTypeForMVT());

+ if (NewAlign <= Align &&

+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))

+ return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),

+ LD->getSrcValue(), LD->getSrcValueOffset(),

+ false, Align);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.

+ // Only do this before legalize, since afterward the target may be depending

+ // on the bitconvert.

+ // First check to see if this is all constant.

+ if (!LegalTypes &&

+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&

+ VT.isVector()) {

+ bool isSimple = true;

+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)

+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&

+ N0.getOperand(i).getOpcode() != ISD::Constant &&

+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {

+ isSimple = false;

+ break;

+ }

+ MVT DestEltVT = N->getValueType(0).getVectorElementType();

+ assert(!DestEltVT.isVector() &&

+ "Element type of vector ValueType must not be vector!");

+ if (isSimple)

+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);

+ }

+ // If the input is a constant, let getNode fold it.

+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {

+ SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);

+ if (Res.getNode() != N) return Res;

+ }

+ // (conv (conv x, t1), t2) -> (conv x, t2)

+ if (N0.getOpcode() == ISD::BIT_CONVERT)

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,

+ N0.getOperand(0));

+ // fold (conv (load x)) -> (load (conv*)x)

+ // If the resultant load doesn't need a higher alignment than the original!

+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&

+ // Do not change the width of a volatile load.

+ !cast<LoadSDNode>(N0)->isVolatile() &&

+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ unsigned Align = TLI.getTargetData()->

+ getABITypeAlignment(VT.getTypeForMVT());

+ unsigned OrigAlign = LN0->getAlignment();

+ if (Align <= OrigAlign) {

+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),

+ LN0->getBasePtr(),

+ LN0->getSrcValue(), LN0->getSrcValueOffset(),

+ LN0->isVolatile(), OrigAlign);

+ AddToWorkList(N);

+ CombineTo(N0.getNode(),

+ DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),

+ N0.getValueType(), Load),

+ Load.getValue(1));

+ return Load;

+ }

+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

+ // This often reduces constant pool loads.

+ if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&

+ N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {

+ SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,

+ N0.getOperand(0));

+ AddToWorkList(NewConv.getNode());

+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());

+ if (N0.getOpcode() == ISD::FNEG)

+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,

+ NewConv, DAG.getConstant(SignBit, VT));

+ assert(N0.getOpcode() == ISD::FABS);

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,

+ NewConv, DAG.getConstant(~SignBit, VT));

+ }

+ // fold (bitconvert (fcopysign cst, x)) ->

+ // (or (and (bitconvert x), sign), (and cst, (not sign)))

+ // Note that we don't handle (copysign x, cst) because this can always be

+ // folded to an fneg or fabs.

+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&

+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&

+ VT.isInteger() && !VT.isVector()) {

+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();

+ MVT IntXVT = MVT::getIntegerVT(OrigXWidth);

+ if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {

+ SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),

+ IntXVT, N0.getOperand(1));

+ AddToWorkList(X.getNode());

+ // If X has a different width than the result/lhs, sext it or truncate it.

+ unsigned VTWidth = VT.getSizeInBits();

+ if (OrigXWidth < VTWidth) {

+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);

+ AddToWorkList(X.getNode());

+ } else if (OrigXWidth > VTWidth) {

+ // To get the sign bit in the right place, we have to shift it right

+ // before truncating.

+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),

+ X.getValueType(), X,

+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));

+ AddToWorkList(X.getNode());

+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);

+ AddToWorkList(X.getNode());

+ }

+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());

+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,

+ X, DAG.getConstant(SignBit, VT));

+ AddToWorkList(X.getNode());

+ SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),

+ VT, N0.getOperand(0));

+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,

+ Cst, DAG.getConstant(~SignBit, VT));

+ AddToWorkList(Cst.getNode());

+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);

+ }

+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.

+ if (N0.getOpcode() == ISD::BUILD_PAIR) {

+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);

+ if (CombineLD.getNode())

+ return CombineLD;

+ }

+ return SDValue();

+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {

+ MVT VT = N->getValueType(0);

+ return CombineConsecutiveLoads(N, VT);

+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector

+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the

+/// destination element value type.

+SDValue DAGCombiner::

+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {

+ MVT SrcEltVT = BV->getValueType(0).getVectorElementType();

+ // If this is already the right type, we're done.

+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);

+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();

+ unsigned DstBitSize = DstEltVT.getSizeInBits();

+ // If this is a conversion of N elements of one type to N elements of another

+ // type, convert each element. This handles FP<->INT cases.

+ if (SrcBitSize == DstBitSize) {

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

+ SDValue Op = BV->getOperand(i);

+ // If the vector element type is not legal, the BUILD_VECTOR operands

+ // are promoted and implicitly truncated. Make that explicit here.

+ if (Op.getValueType() != SrcEltVT)

+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);

+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),

+ DstEltVT, Op));

+ AddToWorkList(Ops.back().getNode());

+ }

+ MVT VT = MVT::getVectorVT(DstEltVT,

+ BV->getValueType(0).getVectorNumElements());

+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,

+ &Ops[0], Ops.size());

+ }

+ // Otherwise, we're growing or shrinking the elements. To avoid having to

+ // handle annoying details of growing/shrinking FP values, we convert them to

+ // int first.

+ if (SrcEltVT.isFloatingPoint()) {

+ // Convert the input float vector to a int vector where the elements are the

+ // same sizes.

+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");

+ MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());

+ BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();

+ SrcEltVT = IntVT;

+ }

+ // Now we know the input is an integer vector. If the output is a FP type,

+ // convert to integer first, then to FP of the right size.

+ if (DstEltVT.isFloatingPoint()) {

+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");

+ MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());

+ SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();

+ // Next, convert to FP elements of the same size.

+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);

+ }

+ // Okay, we know the src/dst types are both integers of differing types.

+ // Handling growing first.

+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());

+ if (SrcBitSize < DstBitSize) {

+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;

+ i += NumInputsPerOutput) {

+ bool isLE = TLI.isLittleEndian();

+ APInt NewBits = APInt(DstBitSize, 0);

+ bool EltIsUndef = true;

+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {

+ // Shift the previously computed bits over.

+ NewBits <<= SrcBitSize;

+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));

+ if (Op.getOpcode() == ISD::UNDEF) continue;

+ EltIsUndef = false;

+ NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).

+ zextOrTrunc(SrcBitSize).zext(DstBitSize));

+ }

+ if (EltIsUndef)

+ Ops.push_back(DAG.getUNDEF(DstEltVT));

+ else

+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));

+ }

+ MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());

+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,

+ &Ops[0], Ops.size());

+ }

+ // Finally, this must be the case where we are shrinking elements: each input

+ // turns into multiple outputs.

+ bool isS2V = ISD::isScalarToVector(BV);

+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;

+ MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {

+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)

+ Ops.push_back(DAG.getUNDEF(DstEltVT));

+ continue;

+ }

+ APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->

+ getAPIntValue()).zextOrTrunc(SrcBitSize);

+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {

+ APInt ThisVal = APInt(OpVal).trunc(DstBitSize);

+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));

+ if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)

+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.

+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,

+ Ops[0]);

+ OpVal = OpVal.lshr(DstBitSize);

+ }

+ // For big endian targets, swap the order of the pieces of each element.

+ if (TLI.isBigEndian())

+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());

+ }

+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,

+ &Ops[0], Ops.size());

+SDValue DAGCombiner::visitFADD(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (fadd c1, c2) -> (fadd c1, c2)

+ if (N0CFP && N1CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);

+ // canonicalize constant to RHS

+ if (N0CFP && !N1CFP)

+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);

+ // fold (fadd A, 0) -> A

+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())

+ return N0;

+ // fold (fadd A, (fneg B)) -> (fsub A, B)

+ if (isNegatibleForFree(N1, LegalOperations) == 2)

+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,

+ GetNegatedExpression(N1, DAG, LegalOperations));

+ // fold (fadd (fneg A), B) -> (fsub B, A)

+ if (isNegatibleForFree(N0, LegalOperations) == 2)

+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,

+ GetNegatedExpression(N0, DAG, LegalOperations));

+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))

+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&

+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))

+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,

+ N0.getOperand(1), N1));

+ return SDValue();

+SDValue DAGCombiner::visitFSUB(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (fsub c1, c2) -> c1-c2

+ if (N0CFP && N1CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);

+ // fold (fsub A, 0) -> A

+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())

+ return N0;

+ // fold (fsub 0, B) -> -B

+ if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {

+ if (isNegatibleForFree(N1, LegalOperations))

+ return GetNegatedExpression(N1, DAG, LegalOperations);

+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);

+ }

+ // fold (fsub A, (fneg B)) -> (fadd A, B)

+ if (isNegatibleForFree(N1, LegalOperations))

+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,

+ GetNegatedExpression(N1, DAG, LegalOperations));

+ return SDValue();

+SDValue DAGCombiner::visitFMUL(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (fmul c1, c2) -> c1*c2

+ if (N0CFP && N1CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);

+ // canonicalize constant to RHS

+ if (N0CFP && !N1CFP)

+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);

+ // fold (fmul A, 0) -> 0

+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())

+ return N1;

+ // fold (fmul X, 2.0) -> (fadd X, X)

+ if (N1CFP && N1CFP->isExactlyValue(+2.0))

+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);

+ // fold (fmul X, (fneg 1.0)) -> (fneg X)

+ if (N1CFP && N1CFP->isExactlyValue(-1.0))

+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);

+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)

+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {

+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {

+ // Both can be negated for free, check to see if at least one is cheaper

+ // negated.

+ if (LHSNeg == 2 || RHSNeg == 2)

+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,

+ GetNegatedExpression(N0, DAG, LegalOperations),

+ GetNegatedExpression(N1, DAG, LegalOperations));

+ }

+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))

+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&

+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))

+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,

+ N0.getOperand(1), N1));

+ return SDValue();

+SDValue DAGCombiner::visitFDIV(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold vector ops

+ if (VT.isVector()) {

+ SDValue FoldedVOp = SimplifyVBinOp(N);

+ if (FoldedVOp.getNode()) return FoldedVOp;

+ }

+ // fold (fdiv c1, c2) -> c1/c2

+ if (N0CFP && N1CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);

+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)

+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {

+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {

+ // Both can be negated for free, check to see if at least one is cheaper

+ // negated.

+ if (LHSNeg == 2 || RHSNeg == 2)

+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,

+ GetNegatedExpression(N0, DAG, LegalOperations),

+ GetNegatedExpression(N1, DAG, LegalOperations));

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFREM(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ // fold (frem c1, c2) -> fmod(c1,c2)

+ if (N0CFP && N1CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);

+ return SDValue();

+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);

+ MVT VT = N->getValueType(0);

+ if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold

+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);

+ if (N1CFP) {

+ const APFloat& V = N1CFP->getValueAPF();

+ // copysign(x, c1) -> fabs(x) iff ispos(c1)

+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)

+ if (!V.isNegative()) {

+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))

+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);

+ } else {

+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,

+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));

+ }

+ // copysign(fabs(x), y) -> copysign(x, y)

+ // copysign(fneg(x), y) -> copysign(x, y)

+ // copysign(copysign(x,z), y) -> copysign(x, y)

+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||

+ N0.getOpcode() == ISD::FCOPYSIGN)

+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,

+ N0.getOperand(0), N1);

+ // copysign(x, abs(y)) -> abs(x)

+ if (N1.getOpcode() == ISD::FABS)

+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);

+ // copysign(x, copysign(y,z)) -> copysign(x, z)

+ if (N1.getOpcode() == ISD::FCOPYSIGN)

+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,

+ N0, N1.getOperand(1));

+ // copysign(x, fp_extend(y)) -> copysign(x, y)

+ // copysign(x, fp_round(y)) -> copysign(x, y)

+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)

+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,

+ N0, N1.getOperand(0));

+ return SDValue();

+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ MVT OpVT = N0.getValueType();

+ // fold (sint_to_fp c1) -> c1fp

+ if (N0C && OpVT != MVT::ppcf128)

+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);

+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,

+ // but UINT_TO_FP is legal on this target, try to convert.

+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&

+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {

+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.

+ if (DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ MVT OpVT = N0.getValueType();

+ // fold (uint_to_fp c1) -> c1fp

+ if (N0C && OpVT != MVT::ppcf128)

+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);

+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,

+ // but SINT_TO_FP is legal on this target, try to convert.

+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&

+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {

+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.

+ if (DAG.SignBitIsZero(N0))

+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ // fold (fp_to_sint c1fp) -> c1

+ if (N0CFP)

+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ // fold (fp_to_uint c1fp) -> c1

+ if (N0CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);

+ return SDValue();

+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ // fold (fp_round c1fp) -> c1fp

+ if (N0CFP && N0.getValueType() != MVT::ppcf128)

+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);

+ // fold (fp_round (fp_extend x)) -> x

+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())

+ return N0.getOperand(0);

+ // fold (fp_round (fp_round x)) -> (fp_round x)

+ if (N0.getOpcode() == ISD::FP_ROUND) {

+ // This is a value preserving truncation if both round's are.

+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&

+ N0.getNode()->getConstantOperandVal(1) == 1;

+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),

+ DAG.getIntPtrConstant(IsTrunc));

+ }

+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)

+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {

+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,

+ N0.getOperand(0), N1);

+ AddToWorkList(Tmp.getNode());

+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,

+ Tmp, N0.getOperand(1));

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ MVT VT = N->getValueType(0);

+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ // fold (fp_round_inreg c1fp) -> c1fp

+ if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {

+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);

+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.

+ if (N->hasOneUse() &&

+ N->use_begin()->getOpcode() == ISD::FP_ROUND)

+ return SDValue();

+ // fold (fp_extend c1fp) -> c1fp

+ if (N0CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);

+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the

+ // value of X.

+ if (N0.getOpcode() == ISD::FP_ROUND

+ && N0.getNode()->getConstantOperandVal(1) == 1) {

+ SDValue In = N0.getOperand(0);

+ if (In.getValueType() == VT) return In;

+ if (VT.bitsLT(In.getValueType()))

+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,

+ In, N0.getOperand(1));

+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);

+ }

+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))

+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&

+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {

+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);

+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,

+ LN0->getChain(),

+ LN0->getBasePtr(), LN0->getSrcValue(),

+ LN0->getSrcValueOffset(),

+ N0.getValueType(),

+ LN0->isVolatile(), LN0->getAlignment());

+ CombineTo(N, ExtLoad);

+ CombineTo(N0.getNode(),

+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),

+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),

+ ExtLoad.getValue(1));

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFNEG(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ if (isNegatibleForFree(N0, LegalOperations))

+ return GetNegatedExpression(N0, DAG, LegalOperations);

+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading

+ // constant pool values.

+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&

+ N0.getOperand(0).getValueType().isInteger() &&

+ !N0.getOperand(0).getValueType().isVector()) {

+ SDValue Int = N0.getOperand(0);

+ MVT IntVT = Int.getValueType();

+ if (IntVT.isInteger() && !IntVT.isVector()) {

+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,

+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));

+ AddToWorkList(Int.getNode());

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),

+ N->getValueType(0), Int);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitFABS(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

+ MVT VT = N->getValueType(0);

+ // fold (fabs c1) -> fabs(c1)

+ if (N0CFP && VT != MVT::ppcf128)

+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);

+ // fold (fabs (fabs x)) -> (fabs x)

+ if (N0.getOpcode() == ISD::FABS)

+ return N->getOperand(0);

+ // fold (fabs (fneg x)) -> (fabs x)

+ // fold (fabs (fcopysign x, y)) -> (fabs x)

+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)

+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));

+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading

+ // constant pool values.

+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&

+ N0.getOperand(0).getValueType().isInteger() &&

+ !N0.getOperand(0).getValueType().isVector()) {

+ SDValue Int = N0.getOperand(0);

+ MVT IntVT = Int.getValueType();

+ if (IntVT.isInteger() && !IntVT.isVector()) {

+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,

+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));

+ AddToWorkList(Int.getNode());

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),

+ N->getValueType(0), Int);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitBRCOND(SDNode *N) {

+ SDValue Chain = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue N2 = N->getOperand(2);

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

+ // never taken branch, fold to chain

+ if (N1C && N1C->isNullValue())

+ return Chain;

+ // unconditional branch

+ if (N1C && N1C->getAPIntValue() == 1)

+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);

+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal

+ // on the target.

+ if (N1.getOpcode() == ISD::SETCC &&

+ TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {

+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,

+ Chain, N1.getOperand(2),

+ N1.getOperand(0), N1.getOperand(1), N2);

+ }

+ if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {

+ // Match this pattern so that we can generate simpler code:

+ //

+ // %a = ...

+ // %b = and i32 %a, 2

+ // %c = srl i32 %b, 1

+ // brcond i32 %c ...

+ //

+ // into

+ //

+ // %a = ...

+ // %b = and %a, 2

+ // %c = setcc eq %b, 0

+ // brcond %c ...

+ //

+ // This applies only when the AND constant value has one bit set and the

+ // SRL constant is equal to the log2 of the AND constant. The back-end is

+ // smart enough to convert the result into a TEST/JMP sequence.

+ SDValue Op0 = N1.getOperand(0);

+ SDValue Op1 = N1.getOperand(1);

+ if (Op0.getOpcode() == ISD::AND &&

+ Op0.hasOneUse() &&

+ Op1.getOpcode() == ISD::Constant) {

+ SDValue AndOp0 = Op0.getOperand(0);

+ SDValue AndOp1 = Op0.getOperand(1);

+ if (AndOp1.getOpcode() == ISD::Constant) {

+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();

+ if (AndConst.isPowerOf2() &&

+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {

+ SDValue SetCC =

+ DAG.getSetCC(N->getDebugLoc(),

+ TLI.getSetCCResultType(Op0.getValueType()),

+ Op0, DAG.getConstant(0, Op0.getValueType()),

+ ISD::SETNE);

+ // Replace the uses of SRL with SETCC

+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);

+ removeFromWorkList(N1.getNode());

+ DAG.DeleteNode(N1.getNode());

+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),

+ MVT::Other, Chain, SetCC, N2);

+ }

+ return SDValue();

+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.

+//

+SDValue DAGCombiner::visitBR_CC(SDNode *N) {

+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));

+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);

+ // Use SimplifySetCC to simplify SETCC's.

+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),

+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),

+ false);

+ if (Simp.getNode()) AddToWorkList(Simp.getNode());

+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());

+ // fold br_cc true, dest -> br dest (unconditional branch)

+ if (SCCC && !SCCC->isNullValue())

+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,

+ N->getOperand(0), N->getOperand(4));

+ // fold br_cc false, dest -> unconditional fall through

+ if (SCCC && SCCC->isNullValue())

+ return N->getOperand(0);

+ // fold to a simpler setcc

+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)

+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,

+ N->getOperand(0), Simp.getOperand(2),

+ Simp.getOperand(0), Simp.getOperand(1),

+ N->getOperand(4));

+ return SDValue();

+/// CombineToPreIndexedLoadStore - Try turning a load / store into a

+/// pre-indexed load / store when the base pointer is an add or subtract

+/// and it has other uses besides the load / store. After the

+/// transformation, the new indexed load / store has effectively folded

+/// the add / subtract in and all of its other uses are redirected to the

+/// new load / store.

+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {

+ if (!LegalOperations)

+ return false;

+ bool isLoad = true;

+ SDValue Ptr;

+ MVT VT;

+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

+ if (LD->isIndexed())

+ return false;

+ VT = LD->getMemoryVT();

+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&

+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))

+ return false;

+ Ptr = LD->getBasePtr();

+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

+ if (ST->isIndexed())

+ return false;

+ VT = ST->getMemoryVT();

+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&

+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))

+ return false;

+ Ptr = ST->getBasePtr();

+ isLoad = false;

+ } else {

+ return false;

+ }

+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail

+ // out. There is no reason to make this a preinc/predec.

+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||

+ Ptr.getNode()->hasOneUse())

+ return false;

+ // Ask the target to do addressing mode selection.

+ SDValue BasePtr;

+ SDValue Offset;

+ ISD::MemIndexedMode AM = ISD::UNINDEXED;

+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))

+ return false;

+ // Don't create a indexed load / store with zero offset.

+ if (isa<ConstantSDNode>(Offset) &&

+ cast<ConstantSDNode>(Offset)->isNullValue())

+ return false;

+ // Try turning it into a pre-indexed load / store except when:

+ // 1) The new base ptr is a frame index.

+ // 2) If N is a store and the new base ptr is either the same as or is a

+ // predecessor of the value being stored.

+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded

+ // that would create a cycle.

+ // 4) All uses are load / store ops that use it as old base ptr.

+ // Check #1. Preinc'ing a frame index would require copying the stack pointer

+ // (plus the implicit offset) to a register to preinc anyway.

+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))

+ return false;

+ // Check #2.

+ if (!isLoad) {

+ SDValue Val = cast<StoreSDNode>(N)->getValue();

+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))

+ return false;

+ }

+ // Now check for #3 and #4.

+ bool RealUse = false;

+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),

+ E = Ptr.getNode()->use_end(); I != E; ++I) {

+ SDNode *Use = *I;

+ if (Use == N)

+ continue;

+ if (Use->isPredecessorOf(N))

+ return false;

+ if (!((Use->getOpcode() == ISD::LOAD &&

+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||

+ (Use->getOpcode() == ISD::STORE &&

+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))

+ RealUse = true;

+ }

+ if (!RealUse)

+ return false;

+ SDValue Result;

+ if (isLoad)

+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),

+ BasePtr, Offset, AM);

+ else

+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),

+ BasePtr, Offset, AM);

+ ++PreIndexedNodes;

+ ++NodesCombined;

+ DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));

+ DOUT << '\n';

+ WorkListRemover DeadNodes(*this);

+ if (isLoad) {

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),

+ &DeadNodes);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),

+ &DeadNodes);

+ } else {

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),

+ &DeadNodes);

+ }

+ // Finally, since the node is now dead, remove it from the graph.

+ DAG.DeleteNode(N);

+ // Replace the uses of Ptr with uses of the updated base value.

+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),

+ &DeadNodes);

+ removeFromWorkList(Ptr.getNode());

+ DAG.DeleteNode(Ptr.getNode());

+ return true;

+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a

+/// add / sub of the base pointer node into a post-indexed load / store.

+/// The transformation folded the add / subtract into the new indexed

+/// load / store effectively and all of its uses are redirected to the

+/// new load / store.

+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {

+ if (!LegalOperations)

+ return false;

+ bool isLoad = true;

+ SDValue Ptr;

+ MVT VT;

+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

+ if (LD->isIndexed())

+ return false;

+ VT = LD->getMemoryVT();

+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&

+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))

+ return false;

+ Ptr = LD->getBasePtr();

+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

+ if (ST->isIndexed())

+ return false;

+ VT = ST->getMemoryVT();

+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&

+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))

+ return false;

+ Ptr = ST->getBasePtr();

+ isLoad = false;

+ } else {

+ return false;

+ }

+ if (Ptr.getNode()->hasOneUse())

+ return false;

+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),

+ E = Ptr.getNode()->use_end(); I != E; ++I) {

+ SDNode *Op = *I;

+ if (Op == N ||

+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))

+ continue;

+ SDValue BasePtr;

+ SDValue Offset;

+ ISD::MemIndexedMode AM = ISD::UNINDEXED;

+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {

+ if (Ptr == Offset)

+ std::swap(BasePtr, Offset);

+ if (Ptr != BasePtr)

+ continue;

+ // Don't create a indexed load / store with zero offset.

+ if (isa<ConstantSDNode>(Offset) &&

+ cast<ConstantSDNode>(Offset)->isNullValue())

+ continue;

+ // Try turning it into a post-indexed load / store except when

+ // 1) All uses are load / store ops that use it as base ptr.

+ // 2) Op must be independent of N, i.e. Op is neither a predecessor

+ // nor a successor of N. Otherwise, if Op is folded that would

+ // create a cycle.

+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))

+ continue;

+ // Check for #1.

+ bool TryNext = false;

+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),

+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {

+ SDNode *Use = *II;

+ if (Use == Ptr.getNode())

+ continue;

+ // If all the uses are load / store addresses, then don't do the

+ // transformation.

+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){

+ bool RealUse = false;

+ for (SDNode::use_iterator III = Use->use_begin(),

+ EEE = Use->use_end(); III != EEE; ++III) {

+ SDNode *UseUse = *III;

+ if (!((UseUse->getOpcode() == ISD::LOAD &&

+ cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||

+ (UseUse->getOpcode() == ISD::STORE &&

+ cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))

+ RealUse = true;

+ }

+ if (!RealUse) {

+ TryNext = true;

+ break;

+ }

+ if (TryNext)

+ continue;

+ // Check for #2

+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {

+ SDValue Result = isLoad

+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),

+ BasePtr, Offset, AM)

+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),

+ BasePtr, Offset, AM);

+ ++PostIndexedNodes;

+ ++NodesCombined;

+ DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));

+ DOUT << '\n';

+ WorkListRemover DeadNodes(*this);

+ if (isLoad) {

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),

+ &DeadNodes);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),

+ &DeadNodes);

+ } else {

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),

+ &DeadNodes);

+ }

+ // Finally, since the node is now dead, remove it from the graph.

+ DAG.DeleteNode(N);

+ // Replace the uses of Use with uses of the updated base value.

+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),

+ Result.getValue(isLoad ? 1 : 0),

+ &DeadNodes);

+ removeFromWorkList(Op);

+ DAG.DeleteNode(Op);

+ return true;

+ }

+ return false;

+/// InferAlignment - If we can infer some alignment information from this

+/// pointer, return it.

+static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {

+ // If this is a direct reference to a stack slot, use information about the

+ // stack slot's alignment.

+ int FrameIdx = 1 << 31;

+ int64_t FrameOffset = 0;

+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {

+ FrameIdx = FI->getIndex();

+ } else if (Ptr.getOpcode() == ISD::ADD &&

+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&

+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {

+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();

+ FrameOffset = Ptr.getConstantOperandVal(1);

+ }

+ if (FrameIdx != (1 << 31)) {

+ // FIXME: Handle FI+CST.

+ const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();

+ if (MFI.isFixedObjectIndex(FrameIdx)) {

+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;

+ // The alignment of the frame index can be determined from its offset from

+ // the incoming frame position. If the frame object is at offset 32 and

+ // the stack is guaranteed to be 16-byte aligned, then we know that the

+ // object is 16-byte aligned.

+ unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();

+ unsigned Align = MinAlign(ObjectOffset, StackAlign);

+ // Finally, the frame object itself may have a known alignment. Factor

+ // the alignment + offset into a new alignment. For example, if we know

+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a

+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte

+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.

+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),

+ FrameOffset);

+ return std::max(Align, FIInfoAlign);

+ }

+ return 0;

+SDValue DAGCombiner::visitLOAD(SDNode *N) {

+ LoadSDNode *LD = cast<LoadSDNode>(N);

+ SDValue Chain = LD->getChain();

+ SDValue Ptr = LD->getBasePtr();

+ // Try to infer better alignment information than the load already has.

+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {

+ if (unsigned Align = InferAlignment(Ptr, DAG)) {

+ if (Align > LD->getAlignment())

+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),

+ LD->getValueType(0),

+ Chain, Ptr, LD->getSrcValue(),

+ LD->getSrcValueOffset(), LD->getMemoryVT(),

+ LD->isVolatile(), Align);

+ }

+ // If load is not volatile and there are no uses of the loaded value (and

+ // the updated indexed value in case of indexed loads), change uses of the

+ // chain value into uses of the chain input (i.e. delete the dead load).

+ if (!LD->isVolatile()) {

+ if (N->getValueType(1) == MVT::Other) {

+ // Unindexed loads.

+ if (N->hasNUsesOfValue(0, 0)) {

+ // It's not safe to use the two value CombineTo variant here. e.g.

+ // v1, chain2 = load chain1, loc

+ // v2, chain3 = load chain2, loc

+ // v3 = add v2, c

+ // Now we replace use of chain2 with chain1. This makes the second load

+ // isomorphic to the one we are deleting, and thus makes this load live.

+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));

+ DOUT << "\n";

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);

+ if (N->use_empty()) {

+ removeFromWorkList(N);

+ DAG.DeleteNode(N);

+ }

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ } else {

+ // Indexed loads.

+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");

+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {

+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));

+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));

+ DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));

+ DOUT << " and 2 other values\n";

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),

+ DAG.getUNDEF(N->getValueType(1)),

+ &DeadNodes);

+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);

+ removeFromWorkList(N);

+ DAG.DeleteNode(N);

+ return SDValue(N, 0); // Return N so it doesn't get rechecked!

+ }

+ // If this load is directly stored, replace the load value with the stored

+ // value.

+ // TODO: Handle store large -> read small portion.

+ // TODO: Handle TRUNCSTORE/LOADEXT

+ if (LD->getExtensionType() == ISD::NON_EXTLOAD &&

+ !LD->isVolatile()) {

+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {

+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);

+ if (PrevST->getBasePtr() == Ptr &&

+ PrevST->getValue().getValueType() == N->getValueType(0))

+ return CombineTo(N, Chain.getOperand(1), Chain);

+ }

+ if (CombinerAA) {

+ // Walk up chain skipping non-aliasing memory nodes.

+ SDValue BetterChain = FindBetterChain(N, Chain);

+ // If there is a better chain.

+ if (Chain != BetterChain) {

+ SDValue ReplLoad;

+ // Replace the chain to void dependency.

+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {

+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),

+ BetterChain, Ptr,

+ LD->getSrcValue(), LD->getSrcValueOffset(),

+ LD->isVolatile(), LD->getAlignment());

+ } else {

+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),

+ LD->getValueType(0),

+ BetterChain, Ptr, LD->getSrcValue(),

+ LD->getSrcValueOffset(),

+ LD->getMemoryVT(),

+ LD->isVolatile(),

+ LD->getAlignment());

+ }

+ // Create token factor to keep old chain connected.

+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),

+ MVT::Other, Chain, ReplLoad.getValue(1));

+ // Replace uses with load result and token factor. Don't add users

+ // to work list.

+ return CombineTo(N, ReplLoad.getValue(0), Token, false);

+ }

+ // Try transforming N to an indexed load.

+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))

+ return SDValue(N, 0);

+ return SDValue();

+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is

+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some

+/// of the loaded bits, try narrowing the load and store if it would end up

+/// being a win for performance or code size.

+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {

+ StoreSDNode *ST = cast<StoreSDNode>(N);

+ if (ST->isVolatile())

+ return SDValue();

+ SDValue Chain = ST->getChain();

+ SDValue Value = ST->getValue();

+ SDValue Ptr = ST->getBasePtr();

+ MVT VT = Value.getValueType();

+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())

+ return SDValue();

+ unsigned Opc = Value.getOpcode();

+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||

+ Value.getOperand(1).getOpcode() != ISD::Constant)

+ return SDValue();

+ SDValue N0 = Value.getOperand(0);

+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {

+ LoadSDNode *LD = cast<LoadSDNode>(N0);

+ if (LD->getBasePtr() != Ptr)

+ return SDValue();

+ // Find the type to narrow it the load / op / store to.

+ SDValue N1 = Value.getOperand(1);

+ unsigned BitWidth = N1.getValueSizeInBits();

+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();

+ if (Opc == ISD::AND)

+ Imm ^= APInt::getAllOnesValue(BitWidth);

+ if (Imm == 0 || Imm.isAllOnesValue())

+ return SDValue();

+ unsigned ShAmt = Imm.countTrailingZeros();

+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;

+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);

+ MVT NewVT = MVT::getIntegerVT(NewBW);

+ while (NewBW < BitWidth &&

+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&

+ TLI.isNarrowingProfitable(VT, NewVT))) {

+ NewBW = NextPowerOf2(NewBW);

+ NewVT = MVT::getIntegerVT(NewBW);

+ }

+ if (NewBW >= BitWidth)

+ return SDValue();

+ // If the lsb changed does not start at the type bitwidth boundary,

+ // start at the previous one.

+ if (ShAmt % NewBW)

+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;

+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);

+ if ((Imm & Mask) == Imm) {

+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);

+ if (Opc == ISD::AND)

+ NewImm ^= APInt::getAllOnesValue(NewBW);

+ uint64_t PtrOff = ShAmt / 8;

+ // For big endian targets, we need to adjust the offset to the pointer to

+ // load the correct bytes.

+ if (TLI.isBigEndian())

+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;

+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);

+ if (NewAlign <

+ TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))

+ return SDValue();

+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),

+ Ptr.getValueType(), Ptr,

+ DAG.getConstant(PtrOff, Ptr.getValueType()));

+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),

+ LD->getChain(), NewPtr,

+ LD->getSrcValue(), LD->getSrcValueOffset(),

+ LD->isVolatile(), NewAlign);

+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,

+ DAG.getConstant(NewImm, NewVT));

+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),

+ NewVal, NewPtr,

+ ST->getSrcValue(), ST->getSrcValueOffset(),

+ false, NewAlign);

+ AddToWorkList(NewPtr.getNode());

+ AddToWorkList(NewLD.getNode());

+ AddToWorkList(NewVal.getNode());

+ WorkListRemover DeadNodes(*this);

+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),

+ &DeadNodes);

+ ++OpsNarrowed;

+ return NewST;

+ }

+ return SDValue();

+SDValue DAGCombiner::visitSTORE(SDNode *N) {

+ StoreSDNode *ST = cast<StoreSDNode>(N);

+ SDValue Chain = ST->getChain();

+ SDValue Value = ST->getValue();

+ SDValue Ptr = ST->getBasePtr();

+ // Try to infer better alignment information than the store already has.

+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {

+ if (unsigned Align = InferAlignment(Ptr, DAG)) {

+ if (Align > ST->getAlignment())

+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->getMemoryVT(),

+ ST->isVolatile(), Align);

+ }

+ // If this is a store of a bit convert, store the input value if the

+ // resultant store does not need a higher alignment than the original.

+ if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&

+ ST->isUnindexed()) {

+ unsigned OrigAlign = ST->getAlignment();

+ MVT SVT = Value.getOperand(0).getValueType();

+ unsigned Align = TLI.getTargetData()->

+ getABITypeAlignment(SVT.getTypeForMVT());

+ if (Align <= OrigAlign &&

+ ((!LegalOperations && !ST->isVolatile()) ||

+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))

+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);

+ }

+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {

+ // NOTE: If the original store is volatile, this transform must not increase

+ // the number of stores. For example, on x86-32 an f64 can be stored in one

+ // processor operation but an i64 (which is not legal) requires two. So the

+ // transform should not be done in this case.

+ if (Value.getOpcode() != ISD::TargetConstantFP) {

+ SDValue Tmp;

+ switch (CFP->getValueType(0).getSimpleVT()) {

+ default: assert(0 && "Unknown FP type");

+ case MVT::f80: // We don't do this for these yet.

+ case MVT::f128:

+ case MVT::ppcf128:

+ break;

+ case MVT::f32:

+ if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&

+ !ST->isVolatile()) ||

+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {

+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().

+ bitcastToAPInt().getZExtValue(), MVT::i32);

+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->isVolatile(),

+ ST->getAlignment());

+ }

+ break;

+ case MVT::f64:

+ if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&

+ !ST->isVolatile()) ||

+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {

+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().

+ getZExtValue(), MVT::i64);

+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->isVolatile(),

+ ST->getAlignment());

+ } else if (!ST->isVolatile() &&

+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {

+ // Many FP stores are not made apparent until after legalize, e.g. for

+ // argument passing. Since this is so common, custom legalize the

+ // 64-bit integer store into two 32-bit stores.

+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();

+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);

+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);

+ if (TLI.isBigEndian()) std::swap(Lo, Hi);

+ int SVOffset = ST->getSrcValueOffset();

+ unsigned Alignment = ST->getAlignment();

+ bool isVolatile = ST->isVolatile();

+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(),

+ isVolatile, ST->getAlignment());

+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,

+ DAG.getConstant(4, Ptr.getValueType()));

+ SVOffset += 4;

+ Alignment = MinAlign(Alignment, 4U);

+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,

+ Ptr, ST->getSrcValue(),

+ SVOffset, isVolatile, Alignment);

+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,

+ St0, St1);

+ }

+ break;

+ }

+ if (CombinerAA) {

+ // Walk up chain skipping non-aliasing memory nodes.

+ SDValue BetterChain = FindBetterChain(N, Chain);

+ // If there is a better chain.

+ if (Chain != BetterChain) {

+ // Replace the chain to avoid dependency.

+ SDValue ReplStore;

+ if (ST->isTruncatingStore()) {

+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,

+ ST->getSrcValue(),ST->getSrcValueOffset(),

+ ST->getMemoryVT(),

+ ST->isVolatile(), ST->getAlignment());

+ } else {

+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,

+ ST->getSrcValue(), ST->getSrcValueOffset(),

+ ST->isVolatile(), ST->getAlignment());

+ }

+ // Create token to keep both nodes around.

+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),

+ MVT::Other, Chain, ReplStore);

+ // Don't add users to work list.

+ return CombineTo(N, Token, false);

+ }

+ // Try transforming N to an indexed store.

+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))

+ return SDValue(N, 0);

+ // FIXME: is there such a thing as a truncating indexed store?

+ if (ST->isTruncatingStore() && ST->isUnindexed() &&

+ Value.getValueType().isInteger()) {

+ // See if we can simplify the input to this truncstore with knowledge that

+ // only the low bits are being used. For example:

+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"

+ SDValue Shorter =

+ GetDemandedBits(Value,

+ APInt::getLowBitsSet(Value.getValueSizeInBits(),

+ ST->getMemoryVT().getSizeInBits()));

+ AddToWorkList(Value.getNode());

+ if (Shorter.getNode())

+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->getMemoryVT(),

+ ST->isVolatile(), ST->getAlignment());

+ // Otherwise, see if we can simplify the operation with

+ // SimplifyDemandedBits, which only works if the value has a single use.

+ if (SimplifyDemandedBits(Value,

+ APInt::getLowBitsSet(

+ Value.getValueSizeInBits(),

+ ST->getMemoryVT().getSizeInBits())))

+ return SDValue(N, 0);

+ }

+ // If this is a load followed by a store to the same location, then the store

+ // is dead/noop.

+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {

+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&

+ ST->isUnindexed() && !ST->isVolatile() &&

+ // There can't be any side effects between the load and store, such as

+ // a call or store.

+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {

+ // The store is dead, remove it.

+ return Chain;

+ }

+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a

+ // truncating store. We can do this even if this is already a truncstore.

+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)

+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&

+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),

+ ST->getMemoryVT())) {

+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),

+ Ptr, ST->getSrcValue(),

+ ST->getSrcValueOffset(), ST->getMemoryVT(),

+ ST->isVolatile(), ST->getAlignment());

+ }

+ return ReduceLoadOpStoreWidth(N);

+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {

+ SDValue InVec = N->getOperand(0);

+ SDValue InVal = N->getOperand(1);

+ SDValue EltNo = N->getOperand(2);

+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new

+ // vector with the inserted element.

+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {

+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();

+ SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),

+ InVec.getNode()->op_end());

+ if (Elt < Ops.size())

+ Ops[Elt] = InVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),

+ InVec.getValueType(), &Ops[0], Ops.size());

+ }

+ // If the invec is an UNDEF and if EltNo is a constant, create a new

+ // BUILD_VECTOR with undef elements and the inserted element.

+ if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&

+ isa<ConstantSDNode>(EltNo)) {

+ MVT VT = InVec.getValueType();

+ MVT EVT = VT.getVectorElementType();

+ unsigned NElts = VT.getVectorNumElements();

+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));

+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();

+ if (Elt < Ops.size())

+ Ops[Elt] = InVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),

+ InVec.getValueType(), &Ops[0], Ops.size());

+ }

+ return SDValue();

+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

+ // (vextract (scalar_to_vector val, 0) -> val

+ SDValue InVec = N->getOperand(0);

+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {

+ // If the operand is wider than the vector element type then it is implicitly

+ // truncated. Make that explicit here.

+ MVT EltVT = InVec.getValueType().getVectorElementType();

+ SDValue InOp = InVec.getOperand(0);

+ if (InOp.getValueType() != EltVT)

+ return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);

+ return InOp;

+ }

+ // Perform only after legalization to ensure build_vector / vector_shuffle

+ // optimizations have already been done.

+ if (!LegalOperations) return SDValue();

+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)

+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)

+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)

+ SDValue EltNo = N->getOperand(1);

+ if (isa<ConstantSDNode>(EltNo)) {

+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();

+ bool NewLoad = false;

+ bool BCNumEltsChanged = false;

+ MVT VT = InVec.getValueType();

+ MVT EVT = VT.getVectorElementType();

+ MVT LVT = EVT;

+ if (InVec.getOpcode() == ISD::BIT_CONVERT) {

+ MVT BCVT = InVec.getOperand(0).getValueType();

+ if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))

+ return SDValue();

+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())

+ BCNumEltsChanged = true;

+ InVec = InVec.getOperand(0);

+ EVT = BCVT.getVectorElementType();

+ NewLoad = true;

+ }

+ LoadSDNode *LN0 = NULL;

+ const ShuffleVectorSDNode *SVN = NULL;

+ if (ISD::isNormalLoad(InVec.getNode())) {

+ LN0 = cast<LoadSDNode>(InVec);

+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&

+ InVec.getOperand(0).getValueType() == EVT &&

+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {

+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));

+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {

+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)

+ // =>

+ // (load $addr+1*size)

+ // If the bit convert changed the number of elements, it is unsafe

+ // to examine the mask.

+ if (BCNumEltsChanged)

+ return SDValue();

+ // Select the input vector, guarding against out of range extract vector.

+ unsigned NumElems = VT.getVectorNumElements();

+ int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);

+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);

+ if (InVec.getOpcode() == ISD::BIT_CONVERT)

+ InVec = InVec.getOperand(0);

+ if (ISD::isNormalLoad(InVec.getNode())) {

+ LN0 = cast<LoadSDNode>(InVec);

+ Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;

+ }

+ if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())

+ return SDValue();

+ unsigned Align = LN0->getAlignment();

+ if (NewLoad) {

+ // Check the resultant load doesn't need a higher alignment than the

+ // original load.

+ unsigned NewAlign =

+ TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());

+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))

+ return SDValue();

+ Align = NewAlign;

+ }

+ SDValue NewPtr = LN0->getBasePtr();

+ if (Elt) {

+ unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;

+ MVT PtrType = NewPtr.getValueType();

+ if (TLI.isBigEndian())

+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;

+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,

+ DAG.getConstant(PtrOff, PtrType));

+ }

+ return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,

+ LN0->getSrcValue(), LN0->getSrcValueOffset(),

+ LN0->isVolatile(), Align);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {

+ unsigned NumInScalars = N->getNumOperands();

+ MVT VT = N->getValueType(0);

+ MVT EltType = VT.getVectorElementType();

+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT

+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from

+ // at most two distinct vectors, turn this into a shuffle node.

+ SDValue VecIn1, VecIn2;

+ for (unsigned i = 0; i != NumInScalars; ++i) {

+ // Ignore undef inputs.

+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

+ // If this input is something other than a EXTRACT_VECTOR_ELT with a

+ // constant index, bail out.

+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {

+ VecIn1 = VecIn2 = SDValue(0, 0);

+ break;

+ }

+ // If the input vector type disagrees with the result of the build_vector,

+ // we can't make a shuffle.

+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);

+ if (ExtractedFromVec.getValueType() != VT) {

+ VecIn1 = VecIn2 = SDValue(0, 0);

+ break;

+ }

+ // Otherwise, remember this. We allow up to two distinct input vectors.

+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)

+ continue;

+ if (VecIn1.getNode() == 0) {

+ VecIn1 = ExtractedFromVec;

+ } else if (VecIn2.getNode() == 0) {

+ VecIn2 = ExtractedFromVec;

+ } else {

+ // Too many inputs.

+ VecIn1 = VecIn2 = SDValue(0, 0);

+ break;

+ }

+ // If everything is good, we can make a shuffle operation.

+ if (VecIn1.getNode()) {

+ SmallVector<int, 8> Mask;

+ for (unsigned i = 0; i != NumInScalars; ++i) {

+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {

+ Mask.push_back(-1);

+ continue;

+ }

+ // If extracting from the first vector, just use the index directly.

+ SDValue Extract = N->getOperand(i);

+ SDValue ExtVal = Extract.getOperand(1);

+ if (Extract.getOperand(0) == VecIn1) {

+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();

+ if (ExtIndex > VT.getVectorNumElements())

+ return SDValue();

+ Mask.push_back(ExtIndex);

+ continue;

+ }

+ // Otherwise, use InIdx + VecSize

+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();

+ Mask.push_back(Idx+NumInScalars);

+ }

+ // Add count and size info.

+ if (!TLI.isTypeLegal(VT) && LegalTypes)

+ return SDValue();

+ // Return the new VECTOR_SHUFFLE node.

+ SDValue Ops[2];

+ Ops[0] = VecIn1;

+ Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);

+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);

+ }

+ return SDValue();

+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {

+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of

+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector

+ // inputs come from at most two distinct vectors, turn this into a shuffle

+ // node.

+ // If we only have one input vector, we don't need to do any concatenation.

+ if (N->getNumOperands() == 1)

+ return N->getOperand(0);

+ return SDValue();

+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {

+ return SDValue();

+ MVT VT = N->getValueType(0);

+ unsigned NumElts = VT.getVectorNumElements();

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ assert(N0.getValueType().getVectorNumElements() == NumElts &&

+ "Vector shuffle must be normalized in DAG");

+ // FIXME: implement canonicalizations from DAG.getVectorShuffle()

+ // If it is a splat, check if the argument vector is a build_vector with

+ // all scalar elements the same.

+ if (cast<ShuffleVectorSDNode>(N)->isSplat()) {

+ SDNode *V = N0.getNode();

+ // If this is a bit convert that changes the element type of the vector but

+ // not the number of vector elements, look through it. Be careful not to

+ // look though conversions that change things like v4f32 to v2f64.

+ if (V->getOpcode() == ISD::BIT_CONVERT) {

+ SDValue ConvInput = V->getOperand(0);

+ if (ConvInput.getValueType().isVector() &&

+ ConvInput.getValueType().getVectorNumElements() == NumElts)

+ V = ConvInput.getNode();

+ }

+ if (V->getOpcode() == ISD::BUILD_VECTOR) {

+ unsigned NumElems = V->getNumOperands();

+ unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();

+ if (NumElems > BaseIdx) {

+ SDValue Base;

+ bool AllSame = true;

+ for (unsigned i = 0; i != NumElems; ++i) {

+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {

+ Base = V->getOperand(i);

+ break;

+ }

+ // Splat of <u, u, u, u>, return <u, u, u, u>

+ if (!Base.getNode())

+ return N0;

+ for (unsigned i = 0; i != NumElems; ++i) {

+ if (V->getOperand(i) != Base) {

+ AllSame = false;

+ break;

+ }

+ // Splat of <x, x, x, x>, return <x, x, x, x>

+ if (AllSame)

+ return N0;

+ }

+ return SDValue();

+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform

+/// an AND to a vector_shuffle with the destination vector and a zero vector.

+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>

+/// vector_shuffle V, Zero, <0, 4, 2, 4>

+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ if (N->getOpcode() == ISD::AND) {

+ if (RHS.getOpcode() == ISD::BIT_CONVERT)

+ RHS = RHS.getOperand(0);

+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {

+ SmallVector<int, 8> Indices;

+ unsigned NumElts = RHS.getNumOperands();

+ for (unsigned i = 0; i != NumElts; ++i) {

+ SDValue Elt = RHS.getOperand(i);

+ if (!isa<ConstantSDNode>(Elt))

+ return SDValue();

+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())

+ Indices.push_back(i);

+ else if (cast<ConstantSDNode>(Elt)->isNullValue())

+ Indices.push_back(NumElts);

+ else

+ return SDValue();

+ }

+ // Let's see if the target supports this vector_shuffle.

+ MVT RVT = RHS.getValueType();

+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))

+ return SDValue();

+ // Return the new VECTOR_SHUFFLE node.

+ MVT EVT = RVT.getVectorElementType();

+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),

+ DAG.getConstant(0, EVT));

+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),

+ RVT, &ZeroOps[0], ZeroOps.size());

+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);

+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);

+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);

+ }

+ return SDValue();

+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.

+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

+ // After legalize, the target may be depending on adds and other

+ // binary ops to provide legal ways to construct constants or other

+ // things. Simplifying them may result in a loss of legality.

+ if (LegalOperations) return SDValue();

+ MVT VT = N->getValueType(0);

+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");

+ MVT EltType = VT.getVectorElementType();

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ SDValue Shuffle = XformToShuffleWithZero(N);

+ if (Shuffle.getNode()) return Shuffle;

+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold

+ // this operation.

+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&

+ RHS.getOpcode() == ISD::BUILD_VECTOR) {

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {

+ SDValue LHSOp = LHS.getOperand(i);

+ SDValue RHSOp = RHS.getOperand(i);

+ // If these two elements can't be folded, bail out.

+ if ((LHSOp.getOpcode() != ISD::UNDEF &&

+ LHSOp.getOpcode() != ISD::Constant &&

+ LHSOp.getOpcode() != ISD::ConstantFP) ||

+ (RHSOp.getOpcode() != ISD::UNDEF &&

+ RHSOp.getOpcode() != ISD::Constant &&

+ RHSOp.getOpcode() != ISD::ConstantFP))

+ break;

+ // Can't fold divide by zero.

+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||

+ N->getOpcode() == ISD::FDIV) {

+ if ((RHSOp.getOpcode() == ISD::Constant &&

+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||

+ (RHSOp.getOpcode() == ISD::ConstantFP &&

+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))

+ break;

+ }

+ Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),

+ EltType, LHSOp, RHSOp));

+ AddToWorkList(Ops.back().getNode());

+ assert((Ops.back().getOpcode() == ISD::UNDEF ||

+ Ops.back().getOpcode() == ISD::Constant ||

+ Ops.back().getOpcode() == ISD::ConstantFP) &&

+ "Scalar binop didn't fold!");

+ }

+ if (Ops.size() == LHS.getNumOperands()) {

+ MVT VT = LHS.getValueType();

+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,

+ &Ops[0], Ops.size());

+ }

+ return SDValue();

+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,

+ SDValue N1, SDValue N2){

+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");

+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,

+ cast<CondCodeSDNode>(N0.getOperand(2))->get());

+ // If we got a simplified select_cc node back from SimplifySelectCC, then

+ // break it down into a new SETCC node, and a new SELECT node, and then return

+ // the SELECT node, since we were called with a SELECT node.

+ if (SCC.getNode()) {

+ // Check to see if we got a select_cc back (to turn into setcc/select).

+ // Otherwise, just return whatever node we got back, like fabs.

+ if (SCC.getOpcode() == ISD::SELECT_CC) {

+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),

+ N0.getValueType(),

+ SCC.getOperand(0), SCC.getOperand(1),

+ SCC.getOperand(4));

+ AddToWorkList(SETCC.getNode());

+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),

+ SCC.getOperand(2), SCC.getOperand(3), SETCC);

+ }

+ return SCC;

+ }

+ return SDValue();

+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS

+/// are the two values being selected between, see if we can simplify the

+/// select. Callers of this should assume that TheSelect is deleted if this

+/// returns true. As such, they should return the appropriate thing (e.g. the

+/// node) back to the top-level of the DAG combiner loop to avoid it being

+/// looked at.

+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,

+ SDValue RHS) {

+ // If this is a select from two identical things, try to pull the operation

+ // through the select.

+ if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){

+ // If this is a load and the token chain is identical, replace the select

+ // of two loads with a load through a select of the address to load from.

+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP

+ // constants have been dropped into the constant pool.

+ if (LHS.getOpcode() == ISD::LOAD &&

+ // Do not let this transformation reduce the number of volatile loads.

+ !cast<LoadSDNode>(LHS)->isVolatile() &&

+ !cast<LoadSDNode>(RHS)->isVolatile() &&

+ // Token chains must be identical.

+ LHS.getOperand(0) == RHS.getOperand(0)) {

+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);

+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);

+ // If this is an EXTLOAD, the VT's must match.

+ if (LLD->getMemoryVT() == RLD->getMemoryVT()) {

+ // FIXME: this conflates two src values, discarding one. This is not

+ // the right thing to do, but nothing uses srcvalues now. When they do,

+ // turn SrcValue into a list of locations.

+ SDValue Addr;

+ if (TheSelect->getOpcode() == ISD::SELECT) {

+ // Check that the condition doesn't reach either load. If so, folding

+ // this will induce a cycle into the DAG.

+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&

+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {

+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),

+ LLD->getBasePtr().getValueType(),

+ TheSelect->getOperand(0), LLD->getBasePtr(),

+ RLD->getBasePtr());

+ }

+ } else {

+ // Check that the condition doesn't reach either load. If so, folding

+ // this will induce a cycle into the DAG.

+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&

+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&

+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&

+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {

+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),

+ LLD->getBasePtr().getValueType(),

+ TheSelect->getOperand(0),

+ TheSelect->getOperand(1),

+ LLD->getBasePtr(), RLD->getBasePtr(),

+ TheSelect->getOperand(4));

+ }

+ if (Addr.getNode()) {

+ SDValue Load;

+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {

+ Load = DAG.getLoad(TheSelect->getValueType(0),

+ TheSelect->getDebugLoc(),

+ LLD->getChain(),

+ Addr,LLD->getSrcValue(),

+ LLD->getSrcValueOffset(),

+ LLD->isVolatile(),

+ LLD->getAlignment());

+ } else {

+ Load = DAG.getExtLoad(LLD->getExtensionType(),

+ TheSelect->getDebugLoc(),

+ TheSelect->getValueType(0),

+ LLD->getChain(), Addr, LLD->getSrcValue(),

+ LLD->getSrcValueOffset(),

+ LLD->getMemoryVT(),

+ LLD->isVolatile(),

+ LLD->getAlignment());

+ }

+ // Users of the select now use the result of the load.

+ CombineTo(TheSelect, Load);

+ // Users of the old loads now use the new load's chain. We know the

+ // old-load value is dead now.

+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));

+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));

+ return true;

+ }

+ return false;

+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3

+/// where 'cond' is the comparison specified by CC.

+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,

+ SDValue N2, SDValue N3,

+ ISD::CondCode CC, bool NotExtCompare) {

+ // (x ? y : y) -> y.

+ if (N2 == N3) return N2;

+ MVT VT = N2.getValueType();

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());

+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());

+ // Determine if the condition we're dealing with is constant

+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),

+ N0, N1, CC, DL, false);

+ if (SCC.getNode()) AddToWorkList(SCC.getNode());

+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());

+ // fold select_cc true, x, y -> x

+ if (SCCC && !SCCC->isNullValue())

+ return N2;

+ // fold select_cc false, x, y -> y

+ if (SCCC && SCCC->isNullValue())

+ return N3;

+ // Check to see if we can simplify the select into an fabs node

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {

+ // Allow either -0.0 or 0.0

+ if (CFP->getValueAPF().isZero()) {

+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs

+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&

+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&

+ N2 == N3.getOperand(0))

+ return DAG.getNode(ISD::FABS, DL, VT, N0);

+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs

+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&

+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&

+ N2.getOperand(0) == N3)

+ return DAG.getNode(ISD::FABS, DL, VT, N3);

+ }

+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"

+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0

+ // in it. This is a win when the constant is not otherwise available because

+ // it replaces two constant pool loads with one. We only do this if the FP

+ // type is known to be legal, because if it isn't, then we are before legalize

+ // types an we want the other legalization to happen first (e.g. to avoid

+ // messing with soft float) and if the ConstantFP is not legal, because if

+ // it is legal, we may not need to store the FP constant in a constant pool.

+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))

+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {

+ if (TLI.isTypeLegal(N2.getValueType()) &&

+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=

+ TargetLowering::Legal) &&

+ // If both constants have multiple uses, then we won't need to do an

+ // extra load, they are likely around in registers for other users.

+ (TV->hasOneUse() || FV->hasOneUse())) {

+ Constant *Elts[] = {

+ const_cast<ConstantFP*>(FV->getConstantFPValue()),

+ const_cast<ConstantFP*>(TV->getConstantFPValue())

+ };

+ const Type *FPTy = Elts[0]->getType();

+ const TargetData &TD = *TLI.getTargetData();

+ // Create a ConstantArray of the two constants.

+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);

+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),

+ TD.getPrefTypeAlignment(FPTy));

+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();

+ // Get the offsets to the 0 and 1 element of the array so that we can

+ // select between them.

+ SDValue Zero = DAG.getIntPtrConstant(0);

+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());

+ SDValue One = DAG.getIntPtrConstant(EltSize);

+ SDValue Cond = DAG.getSetCC(DL,

+ TLI.getSetCCResultType(N0.getValueType()),

+ N0, N1, CC);

+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),

+ Cond, One, Zero);

+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,

+ CstOffset);

+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,

+ PseudoSourceValue::getConstantPool(), 0, false,

+ Alignment);

+ }

+ // Check to see if we can perform the "gzip trick", transforming

+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)

+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&

+ N0.getValueType().isInteger() &&

+ N2.getValueType().isInteger() &&

+ (N1C->isNullValue() || // (a < 0) ? b : 0

+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0

+ MVT XType = N0.getValueType();

+ MVT AType = N2.getValueType();

+ if (XType.bitsGE(AType)) {

+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a

+ // single-bit constant.

+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {

+ unsigned ShCtV = N2C->getAPIntValue().logBase2();

+ ShCtV = XType.getSizeInBits()-ShCtV-1;

+ SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());

+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),

+ XType, N0, ShCt);

+ AddToWorkList(Shift.getNode());

+ if (XType.bitsGT(AType)) {

+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);

+ AddToWorkList(Shift.getNode());

+ }

+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);

+ }

+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),

+ XType, N0,

+ DAG.getConstant(XType.getSizeInBits()-1,

+ getShiftAmountTy()));

+ AddToWorkList(Shift.getNode());

+ if (XType.bitsGT(AType)) {

+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);

+ AddToWorkList(Shift.getNode());

+ }

+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);

+ }

+ // fold select C, 16, 0 -> shl C, 4

+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&

+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {

+ // If the caller doesn't want us to simplify this into a zext of a compare,

+ // don't do it.

+ if (NotExtCompare && N2C->getAPIntValue() == 1)

+ return SDValue();

+ // Get a SetCC of the condition

+ // FIXME: Should probably make sure that setcc is legal if we ever have a

+ // target where it isn't.

+ SDValue Temp, SCC;

+ // cast from setcc result type to select result type

+ if (LegalTypes) {

+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),

+ N0, N1, CC);

+ if (N2.getValueType().bitsLT(SCC.getValueType()))

+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());

+ else

+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),

+ N2.getValueType(), SCC);

+ } else {

+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);

+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),

+ N2.getValueType(), SCC);

+ }

+ AddToWorkList(SCC.getNode());

+ AddToWorkList(Temp.getNode());

+ if (N2C->getAPIntValue() == 1)

+ return Temp;

+ // shl setcc result by log2 n2c

+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,

+ DAG.getConstant(N2C->getAPIntValue().logBase2(),

+ getShiftAmountTy()));

+ }

+ // Check to see if this is the equivalent of setcc

+ // FIXME: Turn all of these into setcc if setcc if setcc is legal

+ // otherwise, go ahead with the folds.

+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {

+ MVT XType = N0.getValueType();

+ if (!LegalOperations ||

+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {

+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);

+ if (Res.getValueType() != VT)

+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);

+ return Res;

+ }

+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))

+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&

+ (!LegalOperations ||

+ TLI.isOperationLegal(ISD::CTLZ, XType))) {

+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);

+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,

+ DAG.getConstant(Log2_32(XType.getSizeInBits()),

+ getShiftAmountTy()));

+ }

+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))

+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {

+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),

+ XType, DAG.getConstant(0, XType), N0);

+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);

+ return DAG.getNode(ISD::SRL, DL, XType,

+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),

+ DAG.getConstant(XType.getSizeInBits()-1,

+ getShiftAmountTy()));

+ }

+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))

+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {

+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,

+ DAG.getConstant(XType.getSizeInBits()-1,

+ getShiftAmountTy()));

+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));

+ }

+ // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->

+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)

+ if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&

+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&

+ N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {

+ MVT XType = N0.getValueType();

+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,

+ DAG.getConstant(XType.getSizeInBits()-1,

+ getShiftAmountTy()));

+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,

+ N0, Shift);

+ AddToWorkList(Shift.getNode());

+ AddToWorkList(Add.getNode());

+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);

+ }

+ // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->

+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)

+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&

+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {

+ if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {

+ MVT XType = N0.getValueType();

+ if (SubC->isNullValue() && XType.isInteger()) {

+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,

+ N0,

+ DAG.getConstant(XType.getSizeInBits()-1,

+ getShiftAmountTy()));

+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),

+ XType, N0, Shift);

+ AddToWorkList(Shift.getNode());

+ AddToWorkList(Add.getNode());

+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);

+ }

+ return SDValue();

+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.

+SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,

+ SDValue N1, ISD::CondCode Cond,

+ DebugLoc DL, bool foldBooleans) {

+ TargetLowering::DAGCombinerInfo

+ DagCombineInfo(DAG, Level == Unrestricted, false, this);

+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);

+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,

+/// return a DAG expression to select that will generate the same value by

+/// multiplying by a magic number. See:

+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>

+SDValue DAGCombiner::BuildSDIV(SDNode *N) {

+ std::vector<SDNode*> Built;

+ SDValue S = TLI.BuildSDIV(N, DAG, &Built);

+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();

+ ii != ee; ++ii)

+ AddToWorkList(*ii);

+ return S;

+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,

+/// return a DAG expression to select that will generate the same value by

+/// multiplying by a magic number. See:

+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>

+SDValue DAGCombiner::BuildUDIV(SDNode *N) {

+ std::vector<SDNode*> Built;

+ SDValue S = TLI.BuildUDIV(N, DAG, &Built);

+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();

+ ii != ee; ++ii)

+ AddToWorkList(*ii);

+ return S;

+/// FindBaseOffset - Return true if base is known not to alias with anything

+/// but itself. Provides base object and offset as results.

+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {

+ // Assume it is a primitive operation.

+ Base = Ptr; Offset = 0;

+ // If it's an adding a simple constant then integrate the offset.

+ if (Base.getOpcode() == ISD::ADD) {

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {

+ Base = Base.getOperand(0);

+ Offset += C->getZExtValue();

+ }

+ // If it's any of the following then it can't alias with anything but itself.

+ return isa<FrameIndexSDNode>(Base) ||

+ isa<ConstantPoolSDNode>(Base) ||

+ isa<GlobalAddressSDNode>(Base);

+/// isAlias - Return true if there is any possibility that the two addresses

+/// overlap.

+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,

+ const Value *SrcValue1, int SrcValueOffset1,

+ SDValue Ptr2, int64_t Size2,

+ const Value *SrcValue2, int SrcValueOffset2) const {

+ // If they are the same then they must be aliases.

+ if (Ptr1 == Ptr2) return true;

+ // Gather base node and offset information.

+ SDValue Base1, Base2;

+ int64_t Offset1, Offset2;

+ bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);

+ bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);

+ // If they have a same base address then...

+ if (Base1 == Base2)

+ // Check to see if the addresses overlap.

+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);

+ // If we know both bases then they can't alias.

+ if (KnownBase1 && KnownBase2) return false;

+ if (CombinerGlobalAA) {

+ // Use alias analysis information.

+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);

+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;

+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;

+ AliasAnalysis::AliasResult AAResult =

+ AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);

+ if (AAResult == AliasAnalysis::NoAlias)

+ return false;

+ }

+ // Otherwise we have to assume they alias.

+ return true;

+/// FindAliasInfo - Extracts the relevant alias information from the memory

+/// node. Returns true if the operand was a load.

+bool DAGCombiner::FindAliasInfo(SDNode *N,

+ SDValue &Ptr, int64_t &Size,

+ const Value *&SrcValue, int &SrcValueOffset) const {

+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

+ Ptr = LD->getBasePtr();

+ Size = LD->getMemoryVT().getSizeInBits() >> 3;

+ SrcValue = LD->getSrcValue();

+ SrcValueOffset = LD->getSrcValueOffset();

+ return true;

+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

+ Ptr = ST->getBasePtr();

+ Size = ST->getMemoryVT().getSizeInBits() >> 3;

+ SrcValue = ST->getSrcValue();

+ SrcValueOffset = ST->getSrcValueOffset();

+ } else {

+ assert(0 && "FindAliasInfo expected a memory operand");

+ }

+ return false;

+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,

+/// looking for aliasing nodes and adding them to the Aliases vector.

+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,

+ SmallVector<SDValue, 8> &Aliases) {

+ SmallVector<SDValue, 8> Chains; // List of chains to visit.

+ std::set<SDNode *> Visited; // Visited node set.

+ // Get alias information for node.

+ SDValue Ptr;

+ int64_t Size = 0;

+ const Value *SrcValue = 0;

+ int SrcValueOffset = 0;

+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);

+ // Starting off.

+ Chains.push_back(OriginalChain);

+ // Look at each chain and determine if it is an alias. If so, add it to the

+ // aliases list. If not, then continue up the chain looking for the next

+ // candidate.

+ while (!Chains.empty()) {

+ SDValue Chain = Chains.back();

+ Chains.pop_back();

+ // Don't bother if we've been before.

+ if (Visited.find(Chain.getNode()) != Visited.end()) continue;

+ Visited.insert(Chain.getNode());

+ switch (Chain.getOpcode()) {

+ case ISD::EntryToken:

+ // Entry token is ideal chain operand, but handled in FindBetterChain.

+ break;

+ case ISD::LOAD:

+ case ISD::STORE: {

+ // Get alias information for Chain.

+ SDValue OpPtr;

+ int64_t OpSize = 0;

+ const Value *OpSrcValue = 0;

+ int OpSrcValueOffset = 0;

+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,

+ OpSrcValue, OpSrcValueOffset);

+ // If chain is alias then stop here.

+ if (!(IsLoad && IsOpLoad) &&

+ isAlias(Ptr, Size, SrcValue, SrcValueOffset,

+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {

+ Aliases.push_back(Chain);

+ } else {

+ // Look further up the chain.

+ Chains.push_back(Chain.getOperand(0));

+ // Clean up old chain.

+ AddToWorkList(Chain.getNode());

+ }

+ break;

+ }

+ case ISD::TokenFactor:

+ // We have to check each of the operands of the token factor, so we queue

+ // then up. Adding the operands to the queue (stack) in reverse order

+ // maintains the original order and increases the likelihood that getNode

+ // will find a matching token factor (CSE.)

+ for (unsigned n = Chain.getNumOperands(); n;)

+ Chains.push_back(Chain.getOperand(--n));

+ // Eliminate the token factor if we can.

+ AddToWorkList(Chain.getNode());

+ break;

+ default:

+ // For all other instructions we will just have to take what we can get.

+ Aliases.push_back(Chain);

+ break;

+ }

+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking

+/// for a better chain (aliasing node.)

+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {

+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.

+ // Accumulate all the aliases to this node.

+ GatherAllAliases(N, OldChain, Aliases);

+ if (Aliases.size() == 0) {

+ // If no operands then chain to entry token.

+ return DAG.getEntryNode();

+ } else if (Aliases.size() == 1) {

+ // If a single operand then chain to it. We don't need to revisit it.

+ return Aliases[0];

+ }

+ // Construct a custom tailored token factor.

+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,

+ &Aliases[0], Aliases.size());

+ // Make sure the old chain gets cleaned up.

+ if (NewChain != OldChain) AddToWorkList(OldChain.getNode());

+ return NewChain;

+// SelectionDAG::Combine - This is the entry point for the file.

+//

+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,

+ CodeGenOpt::Level OptLevel) {

+ /// run - This is the main entry point to this class.

+ ///

+ DAGCombiner(*this, AA, OptLevel).Run(Level);

diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..6becff32176e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp

@@ -0,0 +1,1033 @@

+///===-- FastISel.cpp - Implementation of the FastISel class --------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file contains the implementation of the FastISel class.

+//

+// "Fast" instruction selection is designed to emit very poor code quickly.

+// Also, it is not designed to be able to do much lowering, so most illegal

+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is

+// also not intended to be able to do much optimization, except in a few cases

+// where doing optimizations reduces overall compile time. For example, folding

+// constants into immediate fields is often done, because it's cheap and it

+// reduces the number of instructions later phases have to examine.

+//

+// "Fast" instruction selection is able to fail gracefully and transfer

+// control to the SelectionDAG selector for operations that it doesn't

+// support. In many cases, this allows us to avoid duplicating a lot of

+// the complicated lowering logic that SelectionDAG currently has.

+//

+// The intended use for "fast" instruction selection is "-O0" mode

+// compilation, where the quality of the generated code is irrelevant when

+// weighed against the speed at which the code can be generated. Also,

+// at -O0, the LLVM optimizers are not running, and this makes the

+// compile time of codegen a much higher portion of the overall compile

+// time. Despite its limitations, "fast" instruction selection is able to

+// handle enough code on its own to provide noticeable overall speedups

+// in -O0 compiles.

+//

+// Basic operations are supported in a target-independent way, by reading

+// the same instruction descriptions that the SelectionDAG selector reads,

+// and identifying simple arithmetic operations that can be directly selected

+// from simple operators. More complicated operations currently require

+// target-specific code.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/Function.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/Instructions.h"

+#include "llvm/IntrinsicInst.h"

+#include "llvm/CodeGen/FastISel.h"

+#include "llvm/CodeGen/MachineInstrBuilder.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/CodeGen/DebugLoc.h"

+#include "llvm/CodeGen/DwarfWriter.h"

+#include "llvm/Analysis/DebugInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetMachine.h"

+#include "SelectionDAGBuild.h"

+using namespace llvm;

+unsigned FastISel::getRegForValue(Value *V) {

+ MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);

+ // Don't handle non-simple values in FastISel.

+ if (!RealVT.isSimple())

+ return 0;

+ // Ignore illegal types. We must do this before looking up the value

+ // in ValueMap because Arguments are given virtual registers regardless

+ // of whether FastISel can handle them.

+ MVT::SimpleValueType VT = RealVT.getSimpleVT();

+ if (!TLI.isTypeLegal(VT)) {

+ // Promote MVT::i1 to a legal type though, because it's common and easy.

+ if (VT == MVT::i1)

+ VT = TLI.getTypeToTransformTo(VT).getSimpleVT();

+ else

+ return 0;

+ }

+ // Look up the value to see if we already have a register for it. We

+ // cache values defined by Instructions across blocks, and other values

+ // only locally. This is because Instructions already have the SSA

+ // def-dominatess-use requirement enforced.

+ if (ValueMap.count(V))

+ return ValueMap[V];

+ unsigned Reg = LocalValueMap[V];

+ if (Reg != 0)

+ return Reg;

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {

+ if (CI->getValue().getActiveBits() <= 64)

+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());

+ } else if (isa<AllocaInst>(V)) {

+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));

+ } else if (isa<ConstantPointerNull>(V)) {

+ // Translate this as an integer zero so that it can be

+ // local-CSE'd with actual integer zeros.

+ Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType()));

+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {

+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);

+ if (!Reg) {

+ const APFloat &Flt = CF->getValueAPF();

+ MVT IntVT = TLI.getPointerTy();

+ uint64_t x[2];

+ uint32_t IntBitWidth = IntVT.getSizeInBits();

+ bool isExact;

+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,

+ APFloat::rmTowardZero, &isExact);

+ if (isExact) {

+ APInt IntVal(IntBitWidth, 2, x);

+ unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal));

+ if (IntegerReg != 0)

+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);

+ }

+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {

+ if (!SelectOperator(CE, CE->getOpcode())) return 0;

+ Reg = LocalValueMap[CE];

+ } else if (isa<UndefValue>(V)) {

+ Reg = createResultReg(TLI.getRegClassFor(VT));

+ BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg);

+ }

+ // If target-independent code couldn't handle the value, give target-specific

+ // code a try.

+ if (!Reg && isa<Constant>(V))

+ Reg = TargetMaterializeConstant(cast<Constant>(V));

+ // Don't cache constant materializations in the general ValueMap.

+ // To do so would require tracking what uses they dominate.

+ if (Reg != 0)

+ LocalValueMap[V] = Reg;

+ return Reg;

+unsigned FastISel::lookUpRegForValue(Value *V) {

+ // Look up the value to see if we already have a register for it. We

+ // cache values defined by Instructions across blocks, and other values

+ // only locally. This is because Instructions already have the SSA

+ // def-dominatess-use requirement enforced.

+ if (ValueMap.count(V))

+ return ValueMap[V];

+ return LocalValueMap[V];

+/// UpdateValueMap - Update the value map to include the new mapping for this

+/// instruction, or insert an extra copy to get the result in a previous

+/// determined register.

+/// NOTE: This is only necessary because we might select a block that uses

+/// a value before we select the block that defines the value. It might be

+/// possible to fix this by selecting blocks in reverse postorder.

+unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {

+ if (!isa<Instruction>(I)) {

+ LocalValueMap[I] = Reg;

+ return Reg;

+ }

+ unsigned &AssignedReg = ValueMap[I];

+ if (AssignedReg == 0)

+ AssignedReg = Reg;

+ else if (Reg != AssignedReg) {

+ const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);

+ TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,

+ Reg, RegClass, RegClass);

+ }

+ return AssignedReg;

+unsigned FastISel::getRegForGEPIndex(Value *Idx) {

+ unsigned IdxN = getRegForValue(Idx);

+ if (IdxN == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return 0;

+ // If the index is smaller or larger than intptr_t, truncate or extend it.

+ MVT PtrVT = TLI.getPointerTy();

+ MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false);

+ if (IdxVT.bitsLT(PtrVT))

+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),

+ ISD::SIGN_EXTEND, IdxN);

+ else if (IdxVT.bitsGT(PtrVT))

+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),

+ ISD::TRUNCATE, IdxN);

+ return IdxN;

+/// SelectBinaryOp - Select and emit code for a binary operator instruction,

+/// which has an opcode which directly corresponds to the given ISD opcode.

+///

+bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {

+ MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);

+ if (VT == MVT::Other || !VT.isSimple())

+ // Unhandled type. Halt "fast" selection and bail.

+ return false;

+ // We only handle legal types. For example, on x86-32 the instruction

+ // selector contains all of the 64-bit instructions from x86-64,

+ // under the assumption that i64 won't be used if the target doesn't

+ // support it.

+ if (!TLI.isTypeLegal(VT)) {

+ // MVT::i1 is special. Allow AND, OR, or XOR because they

+ // don't require additional zeroing, which makes them easy.

+ if (VT == MVT::i1 &&

+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||

+ ISDOpcode == ISD::XOR))

+ VT = TLI.getTypeToTransformTo(VT);

+ else

+ return false;

+ }

+ unsigned Op0 = getRegForValue(I->getOperand(0));

+ if (Op0 == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ // Check if the second operand is a constant and handle it appropriately.

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {

+ unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),

+ ISDOpcode, Op0, CI->getZExtValue());

+ if (ResultReg != 0) {

+ // We successfully emitted code for the given LLVM Instruction.

+ UpdateValueMap(I, ResultReg);

+ return true;

+ }

+ // Check if the second operand is a constant float.

+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {

+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),

+ ISDOpcode, Op0, CF);

+ if (ResultReg != 0) {

+ // We successfully emitted code for the given LLVM Instruction.

+ UpdateValueMap(I, ResultReg);

+ return true;

+ }

+ unsigned Op1 = getRegForValue(I->getOperand(1));

+ if (Op1 == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ // Now we have both operands in registers. Emit the instruction.

+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),

+ ISDOpcode, Op0, Op1);

+ if (ResultReg == 0)

+ // Target-specific code wasn't able to find a machine opcode for

+ // the given ISD opcode and type. Halt "fast" selection and bail.

+ return false;

+ // We successfully emitted code for the given LLVM Instruction.

+ UpdateValueMap(I, ResultReg);

+ return true;

+bool FastISel::SelectGetElementPtr(User *I) {

+ unsigned N = getRegForValue(I->getOperand(0));

+ if (N == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ const Type *Ty = I->getOperand(0)->getType();

+ MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT();

+ for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();

+ OI != E; ++OI) {

+ Value *Idx = *OI;

+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {

+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();

+ if (Field) {

+ // N = N + Offset

+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);

+ // FIXME: This can be optimized by combining the add with a

+ // subsequent one.

+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);

+ if (N == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ }

+ Ty = StTy->getElementType(Field);

+ } else {

+ Ty = cast<SequentialType>(Ty)->getElementType();

+ // If this is a constant subscript, handle it quickly.

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {

+ if (CI->getZExtValue() == 0) continue;

+ uint64_t Offs =

+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();

+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);

+ if (N == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ continue;

+ }

+ // N = N + Idx * ElementSize;

+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);

+ unsigned IdxN = getRegForGEPIndex(Idx);

+ if (IdxN == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ if (ElementSize != 1) {

+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);

+ if (IdxN == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ }

+ N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);

+ if (N == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ }

+ // We successfully emitted code for the given LLVM Instruction.

+ UpdateValueMap(I, N);

+ return true;

+bool FastISel::SelectCall(User *I) {

+ Function *F = cast<CallInst>(I)->getCalledFunction();

+ if (!F) return false;

+ unsigned IID = F->getIntrinsicID();

+ switch (IID) {

+ default: break;

+ case Intrinsic::dbg_stoppoint: {

+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);

+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) {

+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));

+ unsigned Line = SPI->getLine();

+ unsigned Col = SPI->getColumn();

+ unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col);

+ setCurDebugLoc(DebugLoc::get(Idx));

+ }

+ return true;

+ }

+ case Intrinsic::dbg_region_start: {

+ DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);

+ if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) &&

+ DW && DW->ShouldEmitDwarfDebug()) {

+ unsigned ID =

+ DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);

+ BuildMI(MBB, DL, II).addImm(ID);

+ }

+ return true;

+ }

+ case Intrinsic::dbg_region_end: {

+ DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);

+ if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) &&

+ DW && DW->ShouldEmitDwarfDebug()) {

+ unsigned ID = 0;

+ DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));

+ if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) {

+ // This is end of an inlined function.

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);

+ ID = DW->RecordInlinedFnEnd(Subprogram);

+ if (ID)

+ // Returned ID is 0 if this is unbalanced "end of inlined

+ // scope". This could happen if optimizer eats dbg intrinsics

+ // or "beginning of inlined scope" is not recoginized due to

+ // missing location info. In such cases, do ignore this region.end.

+ BuildMI(MBB, DL, II).addImm(ID);

+ } else {

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);

+ ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext()));

+ BuildMI(MBB, DL, II).addImm(ID);

+ }

+ return true;

+ }

+ case Intrinsic::dbg_func_start: {

+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);

+ Value *SP = FSI->getSubprogram();

+ if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None))

+ return true;

+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what

+ // (most?) gdb expects.

+ DebugLoc PrevLoc = DL;

+ DISubprogram Subprogram(cast<GlobalVariable>(SP));

+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();

+ if (!Subprogram.describes(MF.getFunction())) {

+ // This is a beginning of an inlined function.

+ // If llvm.dbg.func.start is seen in a new block before any

+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.

+ // FIXME : Why DebugLoc is reset at the beginning of each block ?

+ if (PrevLoc.isUnknown())

+ return true;

+ // Record the source line.

+ unsigned Line = Subprogram.getLineNumber();

+ setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(

+ CompileUnit.getGV(), Line, 0)));

+ if (DW && DW->ShouldEmitDwarfDebug()) {

+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);

+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,

+ DICompileUnit(PrevLocTpl.CompileUnit),

+ PrevLocTpl.Line,

+ PrevLocTpl.Col);

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);

+ BuildMI(MBB, DL, II).addImm(LabelID);

+ }

+ } else {

+ // Record the source line.

+ unsigned Line = Subprogram.getLineNumber();

+ MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(

+ CompileUnit.getGV(), Line, 0)));

+ if (DW && DW->ShouldEmitDwarfDebug()) {

+ // llvm.dbg.func_start also defines beginning of function scope.

+ DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));

+ }

+ return true;

+ }

+ case Intrinsic::dbg_declare: {

+ DbgDeclareInst *DI = cast<DbgDeclareInst>(I);

+ Value *Variable = DI->getVariable();

+ if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) &&

+ DW && DW->ShouldEmitDwarfDebug()) {

+ // Determine the address of the declared object.

+ Value *Address = DI->getAddress();

+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))

+ Address = BCI->getOperand(0);

+ AllocaInst *AI = dyn_cast<AllocaInst>(Address);

+ // Don't handle byval struct arguments or VLAs, for example.

+ if (!AI) break;

+ DenseMap<const AllocaInst*, int>::iterator SI =

+ StaticAllocaMap.find(AI);

+ if (SI == StaticAllocaMap.end()) break; // VLAs.

+ int FI = SI->second;

+ // Determine the debug globalvariable.

+ GlobalValue *GV = cast<GlobalVariable>(Variable);

+ // Build the DECLARE instruction.

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);

+ MachineInstr *DeclareMI

+ = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);

+ DIVariable DV(cast<GlobalVariable>(GV));

+ if (!DV.isNull()) {

+ // This is a local variable

+ DW->RecordVariableScope(DV, DeclareMI);

+ }

+ return true;

+ }

+ case Intrinsic::eh_exception: {

+ MVT VT = TLI.getValueType(I->getType());

+ switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {

+ default: break;

+ case TargetLowering::Expand: {

+ assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");

+ unsigned Reg = TLI.getExceptionAddressRegister();

+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);

+ unsigned ResultReg = createResultReg(RC);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ Reg, RC, RC);

+ assert(InsertedCopy && "Can't copy address registers!");

+ InsertedCopy = InsertedCopy;

+ UpdateValueMap(I, ResultReg);

+ return true;

+ }

+ break;

+ }

+ case Intrinsic::eh_selector_i32:

+ case Intrinsic::eh_selector_i64: {

+ MVT VT = TLI.getValueType(I->getType());

+ switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {

+ default: break;

+ case TargetLowering::Expand: {

+ MVT VT = (IID == Intrinsic::eh_selector_i32 ?

+ MVT::i32 : MVT::i64);

+ if (MMI) {

+ if (MBB->isLandingPad())

+ AddCatchInfo(*cast<CallInst>(I), MMI, MBB);

+ else {

+#ifndef NDEBUG

+ CatchInfoLost.insert(cast<CallInst>(I));

+#endif

+ // FIXME: Mark exception selector register as live in. Hack for PR1508.

+ unsigned Reg = TLI.getExceptionSelectorRegister();

+ if (Reg) MBB->addLiveIn(Reg);

+ }

+ unsigned Reg = TLI.getExceptionSelectorRegister();

+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);

+ unsigned ResultReg = createResultReg(RC);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ Reg, RC, RC);

+ assert(InsertedCopy && "Can't copy address registers!");

+ InsertedCopy = InsertedCopy;

+ UpdateValueMap(I, ResultReg);

+ } else {

+ unsigned ResultReg =

+ getRegForValue(Constant::getNullValue(I->getType()));

+ UpdateValueMap(I, ResultReg);

+ }

+ return true;

+ }

+ break;

+ }

+ return false;

+bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {

+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());

+ MVT DstVT = TLI.getValueType(I->getType());

+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||

+ DstVT == MVT::Other || !DstVT.isSimple())

+ // Unhandled type. Halt "fast" selection and bail.

+ return false;

+ // Check if the destination type is legal. Or as a special case,

+ // it may be i1 if we're doing a truncate because that's

+ // easy and somewhat common.

+ if (!TLI.isTypeLegal(DstVT))

+ if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)

+ // Unhandled type. Halt "fast" selection and bail.

+ return false;

+ // Check if the source operand is legal. Or as a special case,

+ // it may be i1 if we're doing zero-extension because that's

+ // easy and somewhat common.

+ if (!TLI.isTypeLegal(SrcVT))

+ if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)

+ // Unhandled type. Halt "fast" selection and bail.

+ return false;

+ unsigned InputReg = getRegForValue(I->getOperand(0));

+ if (!InputReg)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ // If the operand is i1, arrange for the high bits in the register to be zero.

+ if (SrcVT == MVT::i1) {

+ SrcVT = TLI.getTypeToTransformTo(SrcVT);

+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);

+ if (!InputReg)

+ return false;

+ }

+ // If the result is i1, truncate to the target's type for i1 first.

+ if (DstVT == MVT::i1)

+ DstVT = TLI.getTypeToTransformTo(DstVT);

+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),

+ DstVT.getSimpleVT(),

+ Opcode,

+ InputReg);

+ if (!ResultReg)

+ return false;

+ UpdateValueMap(I, ResultReg);

+ return true;

+bool FastISel::SelectBitCast(User *I) {

+ // If the bitcast doesn't change the type, just use the operand value.

+ if (I->getType() == I->getOperand(0)->getType()) {

+ unsigned Reg = getRegForValue(I->getOperand(0));

+ if (Reg == 0)

+ return false;

+ UpdateValueMap(I, Reg);

+ return true;

+ }

+ // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.

+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());

+ MVT DstVT = TLI.getValueType(I->getType());

+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||

+ DstVT == MVT::Other || !DstVT.isSimple() ||

+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))

+ // Unhandled type. Halt "fast" selection and bail.

+ return false;

+ unsigned Op0 = getRegForValue(I->getOperand(0));

+ if (Op0 == 0)

+ // Unhandled operand. Halt "fast" selection and bail.

+ return false;

+ // First, try to perform the bitcast by inserting a reg-reg copy.

+ unsigned ResultReg = 0;

+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {

+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);

+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);

+ ResultReg = createResultReg(DstClass);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ Op0, DstClass, SrcClass);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ // If the reg-reg copy failed, select a BIT_CONVERT opcode.

+ if (!ResultReg)

+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),

+ ISD::BIT_CONVERT, Op0);

+ if (!ResultReg)

+ return false;

+ UpdateValueMap(I, ResultReg);

+ return true;

+bool

+FastISel::SelectInstruction(Instruction *I) {

+ return SelectOperator(I, I->getOpcode());

+/// FastEmitBranch - Emit an unconditional branch to the given block,

+/// unless it is the immediate (fall-through) successor, and update

+/// the CFG.

+void

+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {

+ MachineFunction::iterator NextMBB =

+ next(MachineFunction::iterator(MBB));

+ if (MBB->isLayoutSuccessor(MSucc)) {

+ // The unconditional fall-through case, which needs no instructions.

+ } else {

+ // The unconditional branch case.

+ TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());

+ }

+ MBB->addSuccessor(MSucc);

+bool

+FastISel::SelectOperator(User *I, unsigned Opcode) {

+ switch (Opcode) {

+ case Instruction::Add: {

+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD;

+ return SelectBinaryOp(I, Opc);

+ }

+ case Instruction::Sub: {

+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB;

+ return SelectBinaryOp(I, Opc);

+ }

+ case Instruction::Mul: {

+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL;

+ return SelectBinaryOp(I, Opc);

+ }

+ case Instruction::SDiv:

+ return SelectBinaryOp(I, ISD::SDIV);

+ case Instruction::UDiv:

+ return SelectBinaryOp(I, ISD::UDIV);

+ case Instruction::FDiv:

+ return SelectBinaryOp(I, ISD::FDIV);

+ case Instruction::SRem:

+ return SelectBinaryOp(I, ISD::SREM);

+ case Instruction::URem:

+ return SelectBinaryOp(I, ISD::UREM);

+ case Instruction::FRem:

+ return SelectBinaryOp(I, ISD::FREM);

+ case Instruction::Shl:

+ return SelectBinaryOp(I, ISD::SHL);

+ case Instruction::LShr:

+ return SelectBinaryOp(I, ISD::SRL);

+ case Instruction::AShr:

+ return SelectBinaryOp(I, ISD::SRA);

+ case Instruction::And:

+ return SelectBinaryOp(I, ISD::AND);

+ case Instruction::Or:

+ return SelectBinaryOp(I, ISD::OR);

+ case Instruction::Xor:

+ return SelectBinaryOp(I, ISD::XOR);

+ case Instruction::GetElementPtr:

+ return SelectGetElementPtr(I);

+ case Instruction::Br: {

+ BranchInst *BI = cast<BranchInst>(I);

+ if (BI->isUnconditional()) {

+ BasicBlock *LLVMSucc = BI->getSuccessor(0);

+ MachineBasicBlock *MSucc = MBBMap[LLVMSucc];

+ FastEmitBranch(MSucc);

+ return true;

+ }

+ // Conditional branches are not handed yet.

+ // Halt "fast" selection and bail.

+ return false;

+ }

+ case Instruction::Unreachable:

+ // Nothing to emit.

+ return true;

+ case Instruction::PHI:

+ // PHI nodes are already emitted.

+ return true;

+ case Instruction::Alloca:

+ // FunctionLowering has the static-sized case covered.

+ if (StaticAllocaMap.count(cast<AllocaInst>(I)))

+ return true;

+ // Dynamic-sized alloca is not handled yet.

+ return false;

+ case Instruction::Call:

+ return SelectCall(I);

+ case Instruction::BitCast:

+ return SelectBitCast(I);

+ case Instruction::FPToSI:

+ return SelectCast(I, ISD::FP_TO_SINT);

+ case Instruction::ZExt:

+ return SelectCast(I, ISD::ZERO_EXTEND);

+ case Instruction::SExt:

+ return SelectCast(I, ISD::SIGN_EXTEND);

+ case Instruction::Trunc:

+ return SelectCast(I, ISD::TRUNCATE);

+ case Instruction::SIToFP:

+ return SelectCast(I, ISD::SINT_TO_FP);

+ case Instruction::IntToPtr: // Deliberate fall-through.

+ case Instruction::PtrToInt: {

+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());

+ MVT DstVT = TLI.getValueType(I->getType());

+ if (DstVT.bitsGT(SrcVT))

+ return SelectCast(I, ISD::ZERO_EXTEND);

+ if (DstVT.bitsLT(SrcVT))

+ return SelectCast(I, ISD::TRUNCATE);

+ unsigned Reg = getRegForValue(I->getOperand(0));

+ if (Reg == 0) return false;

+ UpdateValueMap(I, Reg);

+ return true;

+ }

+ default:

+ // Unhandled instruction. Halt "fast" selection and bail.

+ return false;

+ }

+FastISel::FastISel(MachineFunction &mf,

+ MachineModuleInfo *mmi,

+ DwarfWriter *dw,

+ DenseMap<const Value *, unsigned> &vm,

+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,

+ DenseMap<const AllocaInst *, int> &am

+#ifndef NDEBUG

+ , SmallSet<Instruction*, 8> &cil

+#endif

+ )

+ : MBB(0),

+ ValueMap(vm),

+ MBBMap(bm),

+ StaticAllocaMap(am),

+#ifndef NDEBUG

+ CatchInfoLost(cil),

+#endif

+ MF(mf),

+ MMI(mmi),

+ DW(dw),

+ MRI(MF.getRegInfo()),

+ MFI(*MF.getFrameInfo()),

+ MCP(*MF.getConstantPool()),

+ TM(MF.getTarget()),

+ TD(*TM.getTargetData()),

+ TII(*TM.getInstrInfo()),

+ TLI(*TM.getTargetLowering()) {

+FastISel::~FastISel() {}

+unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType) {

+ return 0;

+unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, unsigned /*Op0*/) {

+ return 0;

+unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, unsigned /*Op0*/,

+ unsigned /*Op0*/) {

+ return 0;

+unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, uint64_t /*Imm*/) {

+ return 0;

+unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, ConstantFP * /*FPImm*/) {

+ return 0;

+unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, unsigned /*Op0*/,

+ uint64_t /*Imm*/) {

+ return 0;

+unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType, unsigned /*Op0*/,

+ ConstantFP * /*FPImm*/) {

+ return 0;

+unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,

+ ISD::NodeType,

+ unsigned /*Op0*/, unsigned /*Op1*/,

+ uint64_t /*Imm*/) {

+ return 0;

+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries

+/// to emit an instruction with an immediate operand using FastEmit_ri.

+/// If that fails, it materializes the immediate into a register and try

+/// FastEmit_rr instead.

+unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,

+ unsigned Op0, uint64_t Imm,

+ MVT::SimpleValueType ImmType) {

+ // First check if immediate type is legal. If not, we can't use the ri form.

+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);

+ if (ResultReg != 0)

+ return ResultReg;

+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);

+ if (MaterialReg == 0)

+ return 0;

+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);

+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries

+/// to emit an instruction with a floating-point immediate operand using

+/// FastEmit_rf. If that fails, it materializes the immediate into a register

+/// and try FastEmit_rr instead.

+unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,

+ unsigned Op0, ConstantFP *FPImm,

+ MVT::SimpleValueType ImmType) {

+ // First check if immediate type is legal. If not, we can't use the rf form.

+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);

+ if (ResultReg != 0)

+ return ResultReg;

+ // Materialize the constant in a register.

+ unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);

+ if (MaterialReg == 0) {

+ // If the target doesn't have a way to directly enter a floating-point

+ // value into a register, use an alternate approach.

+ // TODO: The current approach only supports floating-point constants

+ // that can be constructed by conversion from integer values. This should

+ // be replaced by code that creates a load from a constant-pool entry,

+ // which will require some target-specific work.

+ const APFloat &Flt = FPImm->getValueAPF();

+ MVT IntVT = TLI.getPointerTy();

+ uint64_t x[2];

+ uint32_t IntBitWidth = IntVT.getSizeInBits();

+ bool isExact;

+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,

+ APFloat::rmTowardZero, &isExact);

+ if (!isExact)

+ return 0;

+ APInt IntVal(IntBitWidth, 2, x);

+ unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),

+ ISD::Constant, IntVal.getZExtValue());

+ if (IntegerReg == 0)

+ return 0;

+ MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,

+ ISD::SINT_TO_FP, IntegerReg);

+ if (MaterialReg == 0)

+ return 0;

+ }

+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);

+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {

+ return MRI.createVirtualRegister(RC);

+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,

+ const TargetRegisterClass* RC) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ BuildMI(MBB, DL, II, ResultReg);

+ return ResultReg;

+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ unsigned Op0) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ unsigned Op0, unsigned Op1) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ unsigned Op0, uint64_t Imm) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ unsigned Op0, ConstantFP *FPImm) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ unsigned Op0, unsigned Op1, uint64_t Imm) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,

+ const TargetRegisterClass *RC,

+ uint64_t Imm) {

+ unsigned ResultReg = createResultReg(RC);

+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addImm(Imm);

+ else {

+ BuildMI(MBB, DL, II).addImm(Imm);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,

+ unsigned Op0, uint32_t Idx) {

+ const TargetRegisterClass* RC = MRI.getRegClass(Op0);

+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG);

+ if (II.getNumDefs() >= 1)

+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);

+ else {

+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);

+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,

+ II.ImplicitDefs[0], RC, RC);

+ if (!InsertedCopy)

+ ResultReg = 0;

+ }

+ return ResultReg;

+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op

+/// with all but the least significant bit set to zero.

+unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) {

+ return FastEmit_ri(VT, VT, ISD::AND, Op, 1);

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..2cd67e61907f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -0,0 +1,3091 @@

+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the SelectionDAG::Legalize method.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/MachineJumpTableInfo.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/DwarfWriter.h"

+#include "llvm/Analysis/DebugInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/Target/TargetFrameInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/Target/TargetSubtarget.h"

+#include "llvm/CallingConv.h"

+#include "llvm/Constants.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/Function.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/MathExtras.h"

+#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/ADT/SmallPtrSet.h"

+#include <map>

+using namespace llvm;

+//===----------------------------------------------------------------------===//

+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and

+/// hacks on it until the target machine can handle it. This involves

+/// eliminating value sizes the machine cannot handle (promoting small sizes to

+/// large sizes or splitting up large values into small values) as well as

+/// eliminating operations the machine cannot handle.

+///

+/// This code also does a small amount of optimization and recognition of idioms

+/// as part of its processing. For example, if a target does not support a

+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this

+/// will attempt merge setcc and brc instructions into brcc's.

+///

+namespace {

+class VISIBILITY_HIDDEN SelectionDAGLegalize {

+ TargetLowering &TLI;

+ SelectionDAG &DAG;

+ CodeGenOpt::Level OptLevel;

+ // Libcall insertion helpers.

+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been

+ /// legalized. We use this to ensure that calls are properly serialized

+ /// against each other, including inserted libcalls.

+ SDValue LastCALLSEQ_END;

+ /// IsLegalizingCall - This member is used *only* for purposes of providing

+ /// helpful assertions that a libcall isn't created while another call is

+ /// being legalized (which could lead to non-serialized call sequences).

+ bool IsLegalizingCall;

+ enum LegalizeAction {

+ Legal, // The target natively supports this operation.

+ Promote, // This operation should be executed in a larger type.

+ Expand // Try to expand this to other ops, otherwise use a libcall.

+ };

+ /// ValueTypeActions - This is a bitvector that contains two bits for each

+ /// value type, where the two bits correspond to the LegalizeAction enum.

+ /// This can be queried with "getTypeAction(VT)".

+ TargetLowering::ValueTypeActionImpl ValueTypeActions;

+ /// LegalizedNodes - For nodes that are of legal width, and that have more

+ /// than one use, this map indicates what regularized operand to use. This

+ /// allows us to avoid legalizing the same thing more than once.

+ DenseMap<SDValue, SDValue> LegalizedNodes;

+ void AddLegalizedOperand(SDValue From, SDValue To) {

+ LegalizedNodes.insert(std::make_pair(From, To));

+ // If someone requests legalization of the new node, return itself.

+ if (From != To)

+ LegalizedNodes.insert(std::make_pair(To, To));

+ }

+public:

+ SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);

+ /// getTypeAction - Return how we should legalize values of this type, either

+ /// it is already legal or we need to expand it into multiple registers of

+ /// smaller integer type, or we need to promote it to a larger type.

+ LegalizeAction getTypeAction(MVT VT) const {

+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);

+ }

+ /// isTypeLegal - Return true if this type is legal on this target.

+ ///

+ bool isTypeLegal(MVT VT) const {

+ return getTypeAction(VT) == Legal;

+ }

+ void LegalizeDAG();

+private:

+ /// LegalizeOp - We know that the specified value has a legal type.

+ /// Recursively ensure that the operands have legal types, then return the

+ /// result.

+ SDValue LegalizeOp(SDValue O);

+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable

+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it

+ /// is necessary to spill the vector being inserted into to memory, perform

+ /// the insert there, and then read the result back.

+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,

+ SDValue Idx, DebugLoc dl);

+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,

+ SDValue Idx, DebugLoc dl);

+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which

+ /// performs the same shuffe in terms of order or result bytes, but on a type

+ /// whose vector element type is narrower than the original shuffle type.

+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>

+ SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,

+ SDValue N1, SDValue N2,

+ SmallVectorImpl<int> &Mask) const;

+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,

+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);

+ void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,

+ DebugLoc dl);

+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);

+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,

+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,

+ RTLIB::Libcall Call_PPCF128);

+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,

+ RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,

+ RTLIB::Libcall Call_I128);

+ SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl);

+ SDValue ExpandBUILD_VECTOR(SDNode *Node);

+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);

+ SDValue ExpandDBG_STOPPOINT(SDNode *Node);

+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,

+ SmallVectorImpl<SDValue> &Results);

+ SDValue ExpandFCOPYSIGN(SDNode *Node);

+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT,

+ DebugLoc dl);

+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned,

+ DebugLoc dl);

+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned,

+ DebugLoc dl);

+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);

+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);

+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);

+ void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);

+ void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);

+};

+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which

+/// performs the same shuffe in terms of order or result bytes, but on a type

+/// whose vector element type is narrower than the original shuffle type.

+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>

+SDValue

+SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,

+ SDValue N1, SDValue N2,

+ SmallVectorImpl<int> &Mask) const {

+ MVT EltVT = NVT.getVectorElementType();

+ unsigned NumMaskElts = VT.getVectorNumElements();

+ unsigned NumDestElts = NVT.getVectorNumElements();

+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;

+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");

+ if (NumEltsGrowth == 1)

+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);

+ SmallVector<int, 8> NewMask;

+ for (unsigned i = 0; i != NumMaskElts; ++i) {

+ int Idx = Mask[i];

+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {

+ if (Idx < 0)

+ NewMask.push_back(-1);

+ else

+ NewMask.push_back(Idx * NumEltsGrowth + j);

+ }

+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");

+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");

+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);

+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,

+ CodeGenOpt::Level ol)

+ : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),

+ ValueTypeActions(TLI.getValueTypeActions()) {

+ assert(MVT::LAST_VALUETYPE <= 32 &&

+ "Too many value types for ValueTypeActions to hold!");

+void SelectionDAGLegalize::LegalizeDAG() {

+ LastCALLSEQ_END = DAG.getEntryNode();

+ IsLegalizingCall = false;

+ // The legalize process is inherently a bottom-up recursive process (users

+ // legalize their uses before themselves). Given infinite stack space, we

+ // could just start legalizing on the root and traverse the whole graph. In

+ // practice however, this causes us to run out of stack space on large basic

+ // blocks. To avoid this problem, compute an ordering of the nodes where each

+ // node is only legalized after all of its operands are legalized.

+ DAG.AssignTopologicalOrder();

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = prior(DAG.allnodes_end()); I != next(E); ++I)

+ LegalizeOp(SDValue(I, 0));

+ // Finally, it's possible the root changed. Get the new root.

+ SDValue OldRoot = DAG.getRoot();

+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");

+ DAG.setRoot(LegalizedNodes[OldRoot]);

+ LegalizedNodes.clear();

+ // Remove dead nodes now.

+ DAG.RemoveDeadNodes();

+/// FindCallEndFromCallStart - Given a chained node that is part of a call

+/// sequence, find the CALLSEQ_END node that terminates the call sequence.

+static SDNode *FindCallEndFromCallStart(SDNode *Node) {

+ if (Node->getOpcode() == ISD::CALLSEQ_END)

+ return Node;

+ if (Node->use_empty())

+ return 0; // No CallSeqEnd

+ // The chain is usually at the end.

+ SDValue TheChain(Node, Node->getNumValues()-1);

+ if (TheChain.getValueType() != MVT::Other) {

+ // Sometimes it's at the beginning.

+ TheChain = SDValue(Node, 0);

+ if (TheChain.getValueType() != MVT::Other) {

+ // Otherwise, hunt for it.

+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)

+ if (Node->getValueType(i) == MVT::Other) {

+ TheChain = SDValue(Node, i);

+ break;

+ }

+ // Otherwise, we walked into a node without a chain.

+ if (TheChain.getValueType() != MVT::Other)

+ return 0;

+ }

+ for (SDNode::use_iterator UI = Node->use_begin(),

+ E = Node->use_end(); UI != E; ++UI) {

+ // Make sure to only follow users of our token chain.

+ SDNode *User = *UI;

+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)

+ if (User->getOperand(i) == TheChain)

+ if (SDNode *Result = FindCallEndFromCallStart(User))

+ return Result;

+ }

+ return 0;

+/// FindCallStartFromCallEnd - Given a chained node that is part of a call

+/// sequence, find the CALLSEQ_START node that initiates the call sequence.

+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {

+ assert(Node && "Didn't find callseq_start for a call??");

+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;

+ assert(Node->getOperand(0).getValueType() == MVT::Other &&

+ "Node doesn't have a token chain argument!");

+ return FindCallStartFromCallEnd(Node->getOperand(0).getNode());

+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to

+/// see if any uses can reach Dest. If no dest operands can get to dest,

+/// legalize them, legalize ourself, and return false, otherwise, return true.

+///

+/// Keep track of the nodes we fine that actually do lead to Dest in

+/// NodesLeadingTo. This avoids retraversing them exponential number of times.

+///

+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,

+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {

+ if (N == Dest) return true; // N certainly leads to Dest :)

+ // If we've already processed this node and it does lead to Dest, there is no

+ // need to reprocess it.

+ if (NodesLeadingTo.count(N)) return true;

+ // If the first result of this node has been already legalized, then it cannot

+ // reach N.

+ if (LegalizedNodes.count(SDValue(N, 0))) return false;

+ // Okay, this node has not already been legalized. Check and legalize all

+ // operands. If none lead to Dest, then we can legalize this node.

+ bool OperandsLeadToDest = false;

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.

+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);

+ if (OperandsLeadToDest) {

+ NodesLeadingTo.insert(N);

+ return true;

+ }

+ // Okay, this node looks safe, legalize it and return false.

+ LegalizeOp(SDValue(N, 0));

+ return false;

+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or

+/// a load from the constant pool.

+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,

+ SelectionDAG &DAG, const TargetLowering &TLI) {

+ bool Extend = false;

+ DebugLoc dl = CFP->getDebugLoc();

+ // If a FP immediate is precise when represented as a float and if the

+ // target can do an extending load from float to double, we put it into

+ // the constant pool as a float, even if it's is statically typed as a

+ // double. This shrinks FP constants and canonicalizes them for targets where

+ // an FP extending load is the same cost as a normal load (such as on the x87

+ // fp stack or PPC FP unit).

+ MVT VT = CFP->getValueType(0);

+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());

+ if (!UseCP) {

+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");

+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),

+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);

+ }

+ MVT OrigVT = VT;

+ MVT SVT = VT;

+ while (SVT != MVT::f32) {

+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1);

+ if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&

+ // Only do this if the target has a native EXTLOAD instruction from

+ // smaller type.

+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&

+ TLI.ShouldShrinkFPConstant(OrigVT)) {

+ const Type *SType = SVT.getTypeForMVT();

+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));

+ VT = SVT;

+ Extend = true;

+ }

+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());

+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();

+ if (Extend)

+ return DAG.getExtLoad(ISD::EXTLOAD, dl,

+ OrigVT, DAG.getEntryNode(),

+ CPIdx, PseudoSourceValue::getConstantPool(),

+ 0, VT, false, Alignment);

+ return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,

+ PseudoSourceValue::getConstantPool(), 0, false, Alignment);

+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.

+static

+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,

+ const TargetLowering &TLI) {

+ SDValue Chain = ST->getChain();

+ SDValue Ptr = ST->getBasePtr();

+ SDValue Val = ST->getValue();

+ MVT VT = Val.getValueType();

+ int Alignment = ST->getAlignment();

+ int SVOffset = ST->getSrcValueOffset();

+ DebugLoc dl = ST->getDebugLoc();

+ if (ST->getMemoryVT().isFloatingPoint() ||

+ ST->getMemoryVT().isVector()) {

+ MVT intVT = MVT::getIntegerVT(VT.getSizeInBits());

+ if (TLI.isTypeLegal(intVT)) {

+ // Expand to a bitconvert of the value to the integer type of the

+ // same size, then a (misaligned) int store.

+ // FIXME: Does not handle truncating floating point stores!

+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);

+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),

+ SVOffset, ST->isVolatile(), Alignment);

+ } else {

+ // Do a (aligned) store to a stack slot, then copy from the stack slot

+ // to the final destination using (unaligned) integer loads and stores.

+ MVT StoredVT = ST->getMemoryVT();

+ MVT RegVT =

+ TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits()));

+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;

+ unsigned RegBytes = RegVT.getSizeInBits() / 8;

+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;

+ // Make sure the stack slot is also aligned for the register type.

+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);

+ // Perform the original store, only redirected to the stack slot.

+ SDValue Store = DAG.getTruncStore(Chain, dl,

+ Val, StackPtr, NULL, 0, StoredVT);

+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());

+ SmallVector<SDValue, 8> Stores;

+ unsigned Offset = 0;

+ // Do all but one copies using the full register width.

+ for (unsigned i = 1; i < NumRegs; i++) {

+ // Load one integer register's worth from the stack slot.

+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);

+ // Store it to the final location. Remember the store.

+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,

+ ST->getSrcValue(), SVOffset + Offset,

+ ST->isVolatile(),

+ MinAlign(ST->getAlignment(), Offset)));

+ // Increment the pointers.

+ Offset += RegBytes;

+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,

+ Increment);

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);

+ }

+ // The last store may be partial. Do a truncating store. On big-endian

+ // machines this requires an extending load from the stack slot to ensure

+ // that the bits are in the right place.

+ MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset));

+ // Load from the stack slot.

+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,

+ NULL, 0, MemVT);

+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,

+ ST->getSrcValue(), SVOffset + Offset,

+ MemVT, ST->isVolatile(),

+ MinAlign(ST->getAlignment(), Offset)));

+ // The order of the stores doesn't matter - say it with a TokenFactor.

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],

+ Stores.size());

+ }

+ assert(ST->getMemoryVT().isInteger() &&

+ !ST->getMemoryVT().isVector() &&

+ "Unaligned store of unknown type.");

+ // Get the half-size VT

+ MVT NewStoredVT =

+ (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1);

+ int NumBits = NewStoredVT.getSizeInBits();

+ int IncrementSize = NumBits / 8;

+ // Divide the stored value in two parts.

+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());

+ SDValue Lo = Val;

+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);

+ // Store the two parts

+ SDValue Store1, Store2;

+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,

+ ST->getSrcValue(), SVOffset, NewStoredVT,

+ ST->isVolatile(), Alignment);

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));

+ Alignment = MinAlign(Alignment, IncrementSize);

+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,

+ ST->getSrcValue(), SVOffset + IncrementSize,

+ NewStoredVT, ST->isVolatile(), Alignment);

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);

+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.

+static

+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,

+ const TargetLowering &TLI) {

+ int SVOffset = LD->getSrcValueOffset();

+ SDValue Chain = LD->getChain();

+ SDValue Ptr = LD->getBasePtr();

+ MVT VT = LD->getValueType(0);

+ MVT LoadedVT = LD->getMemoryVT();

+ DebugLoc dl = LD->getDebugLoc();

+ if (VT.isFloatingPoint() || VT.isVector()) {

+ MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits());

+ if (TLI.isTypeLegal(intVT)) {

+ // Expand to a (misaligned) integer load of the same size,

+ // then bitconvert to floating point or vector.

+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),

+ SVOffset, LD->isVolatile(),

+ LD->getAlignment());

+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);

+ if (VT.isFloatingPoint() && LoadedVT != VT)

+ Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);

+ SDValue Ops[] = { Result, Chain };

+ return DAG.getMergeValues(Ops, 2, dl);

+ } else {

+ // Copy the value to a (aligned) stack slot using (unaligned) integer

+ // loads and stores, then do a (aligned) load from the stack slot.

+ MVT RegVT = TLI.getRegisterType(intVT);

+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;

+ unsigned RegBytes = RegVT.getSizeInBits() / 8;

+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;

+ // Make sure the stack slot is also aligned for the register type.

+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);

+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());

+ SmallVector<SDValue, 8> Stores;

+ SDValue StackPtr = StackBase;

+ unsigned Offset = 0;

+ // Do all but one copies using the full register width.

+ for (unsigned i = 1; i < NumRegs; i++) {

+ // Load one integer register's worth from the original location.

+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),

+ SVOffset + Offset, LD->isVolatile(),

+ MinAlign(LD->getAlignment(), Offset));

+ // Follow the load with a store to the stack slot. Remember the store.

+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,

+ NULL, 0));

+ // Increment the pointers.

+ Offset += RegBytes;

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);

+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,

+ Increment);

+ }

+ // The last copy may be partial. Do an extending load.

+ MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset));

+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,

+ LD->getSrcValue(), SVOffset + Offset,

+ MemVT, LD->isVolatile(),

+ MinAlign(LD->getAlignment(), Offset));

+ // Follow the load with a store to the stack slot. Remember the store.

+ // On big-endian machines this requires a truncating store to ensure

+ // that the bits end up in the right place.

+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,

+ NULL, 0, MemVT));

+ // The order of the stores doesn't matter - say it with a TokenFactor.

+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],

+ Stores.size());

+ // Finally, perform the original load only redirected to the stack slot.

+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,

+ NULL, 0, LoadedVT);

+ // Callers expect a MERGE_VALUES node.

+ SDValue Ops[] = { Load, TF };

+ return DAG.getMergeValues(Ops, 2, dl);

+ }

+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&

+ "Unaligned load of unsupported type.");

+ // Compute the new VT that is half the size of the old one. This is an

+ // integer MVT.

+ unsigned NumBits = LoadedVT.getSizeInBits();

+ MVT NewLoadedVT;

+ NewLoadedVT = MVT::getIntegerVT(NumBits/2);

+ NumBits >>= 1;

+ unsigned Alignment = LD->getAlignment();

+ unsigned IncrementSize = NumBits / 8;

+ ISD::LoadExtType HiExtType = LD->getExtensionType();

+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.

+ if (HiExtType == ISD::NON_EXTLOAD)

+ HiExtType = ISD::ZEXTLOAD;

+ // Load the value in two parts

+ SDValue Lo, Hi;

+ if (TLI.isLittleEndian()) {

+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),

+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));

+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),

+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),

+ MinAlign(Alignment, IncrementSize));

+ } else {

+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),

+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));

+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),

+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),

+ MinAlign(Alignment, IncrementSize));

+ }

+ // aggregate the two parts

+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());

+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);

+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);

+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ SDValue Ops[] = { Result, TF };

+ return DAG.getMergeValues(Ops, 2, dl);

+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable

+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it

+/// is necessary to spill the vector being inserted into to memory, perform

+/// the insert there, and then read the result back.

+SDValue SelectionDAGLegalize::

+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,

+ DebugLoc dl) {

+ SDValue Tmp1 = Vec;

+ SDValue Tmp2 = Val;

+ SDValue Tmp3 = Idx;

+ // If the target doesn't support this, we have to spill the input vector

+ // to a temporary stack slot, update the element, then reload it. This is

+ // badness. We could also load the value into a vector register (either

+ // with a "move to register" or "extload into register" instruction, then

+ // permute it into place, if the idx is a constant and if the idx is

+ // supported by the target.

+ MVT VT = Tmp1.getValueType();

+ MVT EltVT = VT.getVectorElementType();

+ MVT IdxVT = Tmp3.getValueType();

+ MVT PtrVT = TLI.getPointerTy();

+ SDValue StackPtr = DAG.CreateStackTemporary(VT);

+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

+ // Store the vector.

+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,

+ PseudoSourceValue::getFixedStack(SPFI), 0);

+ // Truncate or zero extend offset to target pointer type.

+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;

+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);

+ // Add the offset to the index.

+ unsigned EltSize = EltVT.getSizeInBits()/8;

+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));

+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);

+ // Store the scalar value.

+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,

+ PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);

+ // Load the updated vector.

+ return DAG.getLoad(VT, dl, Ch, StackPtr,

+ PseudoSourceValue::getFixedStack(SPFI), 0);

+SDValue SelectionDAGLegalize::

+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {

+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {

+ // SCALAR_TO_VECTOR requires that the type of the value being inserted

+ // match the element type of the vector being created, except for

+ // integers in which case the inserted value can be over width.

+ MVT EltVT = Vec.getValueType().getVectorElementType();

+ if (Val.getValueType() == EltVT ||

+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {

+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,

+ Vec.getValueType(), Val);

+ unsigned NumElts = Vec.getValueType().getVectorNumElements();

+ // We generate a shuffle of InVec and ScVec, so the shuffle mask

+ // should be 0,1,2,3,4,5... with the appropriate element replaced with

+ // elt 0 of the RHS.

+ SmallVector<int, 8> ShufOps;

+ for (unsigned i = 0; i != NumElts; ++i)

+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);

+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,

+ &ShufOps[0]);

+ }

+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);

+/// LegalizeOp - We know that the specified value has a legal type, and

+/// that its operands are legal. Now ensure that the operation itself

+/// is legal, recursively ensuring that the operands' operations remain

+/// legal.

+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {

+ if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.

+ return Op;

+ SDNode *Node = Op.getNode();

+ DebugLoc dl = Node->getDebugLoc();

+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)

+ assert(getTypeAction(Node->getValueType(i)) == Legal &&

+ "Unexpected illegal type!");

+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)

+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||

+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&

+ "Unexpected illegal type!");

+ // Note that LegalizeOp may be reentered even from single-use nodes, which

+ // means that we always must cache transformed nodes.

+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);

+ if (I != LegalizedNodes.end()) return I->second;

+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;

+ SDValue Result = Op;

+ bool isCustom = false;

+ // Figure out the correct action; the way to query this varies by opcode

+ TargetLowering::LegalizeAction Action;

+ bool SimpleFinishLegalizing = true;

+ switch (Node->getOpcode()) {

+ case ISD::INTRINSIC_W_CHAIN:

+ case ISD::INTRINSIC_WO_CHAIN:

+ case ISD::INTRINSIC_VOID:

+ case ISD::VAARG:

+ case ISD::STACKSAVE:

+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);

+ break;

+ case ISD::SINT_TO_FP:

+ case ISD::UINT_TO_FP:

+ case ISD::EXTRACT_VECTOR_ELT:

+ Action = TLI.getOperationAction(Node->getOpcode(),

+ Node->getOperand(0).getValueType());

+ break;

+ case ISD::FP_ROUND_INREG:

+ case ISD::SIGN_EXTEND_INREG: {

+ MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();

+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);

+ break;

+ }

+ case ISD::SELECT_CC:

+ case ISD::SETCC:

+ case ISD::BR_CC: {

+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :

+ Node->getOpcode() == ISD::SETCC ? 2 : 1;

+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;

+ MVT OpVT = Node->getOperand(CompareOperand).getValueType();

+ ISD::CondCode CCCode =

+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();

+ Action = TLI.getCondCodeAction(CCCode, OpVT);

+ if (Action == TargetLowering::Legal) {

+ if (Node->getOpcode() == ISD::SELECT_CC)

+ Action = TLI.getOperationAction(Node->getOpcode(),

+ Node->getValueType(0));

+ else

+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);

+ }

+ break;

+ }

+ case ISD::LOAD:

+ case ISD::STORE:

+ // FIXME: Model these properly. LOAD and STORE are complicated, and

+ // STORE expects the unlegalized operand in some cases.

+ SimpleFinishLegalizing = false;

+ break;

+ case ISD::CALLSEQ_START:

+ case ISD::CALLSEQ_END:

+ // FIXME: This shouldn't be necessary. These nodes have special properties

+ // dealing with the recursive nature of legalization. Removing this

+ // special case should be done as part of making LegalizeDAG non-recursive.

+ SimpleFinishLegalizing = false;

+ break;

+ case ISD::CALL:

+ // FIXME: Legalization for calls requires custom-lowering the call before

+ // legalizing the operands! (I haven't looked into precisely why.)

+ SimpleFinishLegalizing = false;

+ break;

+ case ISD::EXTRACT_ELEMENT:

+ case ISD::FLT_ROUNDS_:

+ case ISD::SADDO:

+ case ISD::SSUBO:

+ case ISD::UADDO:

+ case ISD::USUBO:

+ case ISD::SMULO:

+ case ISD::UMULO:

+ case ISD::FPOWI:

+ case ISD::MERGE_VALUES:

+ case ISD::EH_RETURN:

+ case ISD::FRAME_TO_ARGS_OFFSET:

+ // These operations lie about being legal: when they claim to be legal,

+ // they should actually be expanded.

+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));

+ if (Action == TargetLowering::Legal)

+ Action = TargetLowering::Expand;

+ break;

+ case ISD::TRAMPOLINE:

+ case ISD::FRAMEADDR:

+ case ISD::RETURNADDR:

+ case ISD::FORMAL_ARGUMENTS:

+ // These operations lie about being legal: when they claim to be legal,

+ // they should actually be custom-lowered.

+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));

+ if (Action == TargetLowering::Legal)

+ Action = TargetLowering::Custom;

+ break;

+ case ISD::BUILD_VECTOR:

+ // A weird case: legalization for BUILD_VECTOR never legalizes the

+ // operands!

+ // FIXME: This really sucks... changing it isn't semantically incorrect,

+ // but it massively pessimizes the code for floating-point BUILD_VECTORs

+ // because ConstantFP operands get legalized into constant pool loads

+ // before the BUILD_VECTOR code can see them. It doesn't usually bite,

+ // though, because BUILD_VECTORS usually get lowered into other nodes

+ // which get legalized properly.

+ SimpleFinishLegalizing = false;

+ break;

+ default:

+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {

+ Action = TargetLowering::Legal;

+ } else {

+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));

+ }

+ break;

+ }

+ if (SimpleFinishLegalizing) {

+ SmallVector<SDValue, 8> Ops, ResultVals;

+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)

+ Ops.push_back(LegalizeOp(Node->getOperand(i)));

+ switch (Node->getOpcode()) {

+ default: break;

+ case ISD::BR:

+ case ISD::BRIND:

+ case ISD::BR_JT:

+ case ISD::BR_CC:

+ case ISD::BRCOND:

+ case ISD::RET:

+ // Branches tweak the chain to include LastCALLSEQ_END

+ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],

+ LastCALLSEQ_END);

+ Ops[0] = LegalizeOp(Ops[0]);

+ LastCALLSEQ_END = DAG.getEntryNode();

+ break;

+ case ISD::SHL:

+ case ISD::SRL:

+ case ISD::SRA:

+ case ISD::ROTL:

+ case ISD::ROTR:

+ // Legalizing shifts/rotates requires adjusting the shift amount

+ // to the appropriate width.

+ if (!Ops[1].getValueType().isVector())

+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));

+ break;

+ }

+ Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),

+ Ops.size());

+ switch (Action) {

+ case TargetLowering::Legal:

+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)

+ ResultVals.push_back(Result.getValue(i));

+ break;

+ case TargetLowering::Custom:

+ // FIXME: The handling for custom lowering with multiple results is

+ // a complete mess.

+ Tmp1 = TLI.LowerOperation(Result, DAG);

+ if (Tmp1.getNode()) {

+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {

+ if (e == 1)

+ ResultVals.push_back(Tmp1);

+ else

+ ResultVals.push_back(Tmp1.getValue(i));

+ }

+ break;

+ }

+ // FALL THROUGH

+ case TargetLowering::Expand:

+ ExpandNode(Result.getNode(), ResultVals);

+ break;

+ case TargetLowering::Promote:

+ PromoteNode(Result.getNode(), ResultVals);

+ break;

+ }

+ if (!ResultVals.empty()) {

+ for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {

+ if (ResultVals[i] != SDValue(Node, i))

+ ResultVals[i] = LegalizeOp(ResultVals[i]);

+ AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);

+ }

+ return ResultVals[Op.getResNo()];

+ }

+ switch (Node->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to legalize this operator!");

+ abort();

+ case ISD::CALL:

+ // The only option for this is to custom lower it.

+ Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);

+ assert(Tmp3.getNode() && "Target didn't custom lower this node!");

+ // A call within a calling sequence must be legalized to something

+ // other than the normal CALLSEQ_END. Violating this gets Legalize

+ // into an infinite loop.

+ assert ((!IsLegalizingCall ||

+ Node->getOpcode() != ISD::CALL ||

+ Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) &&

+ "Nested CALLSEQ_START..CALLSEQ_END not supported.");

+ // The number of incoming and outgoing values should match; unless the final

+ // outgoing value is a flag.

+ assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() ||

+ (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 &&

+ Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) ==

+ MVT::Flag)) &&

+ "Lowering call/formal_arguments produced unexpected # results!");

+ // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to

+ // remember that we legalized all of them, so it doesn't get relegalized.

+ for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) {

+ if (Tmp3.getNode()->getValueType(i) == MVT::Flag)

+ continue;

+ Tmp1 = LegalizeOp(Tmp3.getValue(i));

+ if (Op.getResNo() == i)

+ Tmp2 = Tmp1;

+ AddLegalizedOperand(SDValue(Node, i), Tmp1);

+ }

+ return Tmp2;

+ case ISD::BUILD_VECTOR:

+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {

+ default: assert(0 && "This action is not supported yet!");

+ case TargetLowering::Custom:

+ Tmp3 = TLI.LowerOperation(Result, DAG);

+ if (Tmp3.getNode()) {

+ Result = Tmp3;

+ break;

+ }

+ // FALLTHROUGH

+ case TargetLowering::Expand:

+ Result = ExpandBUILD_VECTOR(Result.getNode());

+ break;

+ }

+ break;

+ case ISD::CALLSEQ_START: {

+ SDNode *CallEnd = FindCallEndFromCallStart(Node);

+ // Recursively Legalize all of the inputs of the call end that do not lead

+ // to this call start. This ensures that any libcalls that need be inserted

+ // are inserted *before* the CALLSEQ_START.

+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;

+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)

+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,

+ NodesLeadingTo);

+ }

+ // Now that we legalized all of the inputs (which may have inserted

+ // libcalls) create the new CALLSEQ_START node.

+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.

+ // Merge in the last call, to ensure that this call start after the last

+ // call ended.

+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {

+ Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ Tmp1, LastCALLSEQ_END);

+ Tmp1 = LegalizeOp(Tmp1);

+ }

+ // Do not try to legalize the target-specific arguments (#1+).

+ if (Tmp1 != Node->getOperand(0)) {

+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());

+ Ops[0] = Tmp1;

+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());

+ }

+ // Remember that the CALLSEQ_START is legalized.

+ AddLegalizedOperand(Op.getValue(0), Result);

+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.

+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));

+ // Now that the callseq_start and all of the non-call nodes above this call

+ // sequence have been legalized, legalize the call itself. During this

+ // process, no libcalls can/will be inserted, guaranteeing that no calls

+ // can overlap.

+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");

+ // Note that we are selecting this call!

+ LastCALLSEQ_END = SDValue(CallEnd, 0);

+ IsLegalizingCall = true;

+ // Legalize the call, starting from the CALLSEQ_END.

+ LegalizeOp(LastCALLSEQ_END);

+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");

+ return Result;

+ }

+ case ISD::CALLSEQ_END:

+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This

+ // will cause this node to be legalized as well as handling libcalls right.

+ if (LastCALLSEQ_END.getNode() != Node) {

+ LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));

+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);

+ assert(I != LegalizedNodes.end() &&

+ "Legalizing the call start should have legalized this node!");

+ return I->second;

+ }

+ // Otherwise, the call start has been legalized and everything is going

+ // according to plan. Just legalize ourselves normally here.

+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.

+ // Do not try to legalize the target-specific arguments (#1+), except for

+ // an optional flag input.

+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){

+ if (Tmp1 != Node->getOperand(0)) {

+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());

+ Ops[0] = Tmp1;

+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());

+ }

+ } else {

+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));

+ if (Tmp1 != Node->getOperand(0) ||

+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {

+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());

+ Ops[0] = Tmp1;

+ Ops.back() = Tmp2;

+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());

+ }

+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");

+ // This finishes up call legalization.

+ IsLegalizingCall = false;

+ // If the CALLSEQ_END node has a flag, remember that we legalized it.

+ AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));

+ if (Node->getNumValues() == 2)

+ AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));

+ return Result.getValue(Op.getResNo());

+ case ISD::LOAD: {

+ LoadSDNode *LD = cast<LoadSDNode>(Node);

+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.

+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.

+ ISD::LoadExtType ExtType = LD->getExtensionType();

+ if (ExtType == ISD::NON_EXTLOAD) {

+ MVT VT = Node->getValueType(0);

+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());

+ Tmp3 = Result.getValue(0);

+ Tmp4 = Result.getValue(1);

+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {

+ default: assert(0 && "This action is not supported yet!");

+ case TargetLowering::Legal:

+ // If this is an unaligned load and the target doesn't support it,

+ // expand it.

+ if (!TLI.allowsUnalignedMemoryAccesses()) {

+ unsigned ABIAlignment = TLI.getTargetData()->

+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());

+ if (LD->getAlignment() < ABIAlignment){

+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,

+ TLI);

+ Tmp3 = Result.getOperand(0);

+ Tmp4 = Result.getOperand(1);

+ Tmp3 = LegalizeOp(Tmp3);

+ Tmp4 = LegalizeOp(Tmp4);

+ }

+ break;

+ case TargetLowering::Custom:

+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);

+ if (Tmp1.getNode()) {

+ Tmp3 = LegalizeOp(Tmp1);

+ Tmp4 = LegalizeOp(Tmp1.getValue(1));

+ }

+ break;

+ case TargetLowering::Promote: {

+ // Only promote a load of vector type to another.

+ assert(VT.isVector() && "Cannot promote this load!");

+ // Change base type to a different vector type.

+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);

+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),

+ LD->getSrcValueOffset(),

+ LD->isVolatile(), LD->getAlignment());

+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));

+ Tmp4 = LegalizeOp(Tmp1.getValue(1));

+ break;

+ }

+ // Since loads produce two values, make sure to remember that we

+ // legalized both of them.

+ AddLegalizedOperand(SDValue(Node, 0), Tmp3);

+ AddLegalizedOperand(SDValue(Node, 1), Tmp4);

+ return Op.getResNo() ? Tmp4 : Tmp3;

+ } else {

+ MVT SrcVT = LD->getMemoryVT();

+ unsigned SrcWidth = SrcVT.getSizeInBits();

+ int SVOffset = LD->getSrcValueOffset();

+ unsigned Alignment = LD->getAlignment();

+ bool isVolatile = LD->isVolatile();

+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&

+ // Some targets pretend to have an i1 loading operation, and actually

+ // load an i8. This trick is correct for ZEXTLOAD because the top 7

+ // bits are guaranteed to be zero; it helps the optimizers understand

+ // that these bits are zero. It is also useful for EXTLOAD, since it

+ // tells the optimizers that those bits are undefined. It would be

+ // nice to have an effective generic way of getting these benefits...

+ // Until such a way is found, don't insist on promoting i1 here.

+ (SrcVT != MVT::i1 ||

+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {

+ // Promote to a byte-sized load if not loading an integral number of

+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.

+ unsigned NewWidth = SrcVT.getStoreSizeInBits();

+ MVT NVT = MVT::getIntegerVT(NewWidth);

+ SDValue Ch;

+ // The extra bits are guaranteed to be zero, since we stored them that

+ // way. A zext load from NVT thus automatically gives zext from SrcVT.

+ ISD::LoadExtType NewExtType =

+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;

+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),

+ Tmp1, Tmp2, LD->getSrcValue(), SVOffset,

+ NVT, isVolatile, Alignment);

+ Ch = Result.getValue(1); // The chain.

+ if (ExtType == ISD::SEXTLOAD)

+ // Having the top bits zero doesn't help when sign extending.

+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,

+ Result.getValueType(),

+ Result, DAG.getValueType(SrcVT));

+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())

+ // All the top bits are guaranteed to be zero - inform the optimizers.

+ Result = DAG.getNode(ISD::AssertZext, dl,

+ Result.getValueType(), Result,

+ DAG.getValueType(SrcVT));

+ Tmp1 = LegalizeOp(Result);

+ Tmp2 = LegalizeOp(Ch);

+ } else if (SrcWidth & (SrcWidth - 1)) {

+ // If not loading a power-of-2 number of bits, expand as two loads.

+ assert(SrcVT.isExtended() && !SrcVT.isVector() &&

+ "Unsupported extload!");

+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);

+ assert(RoundWidth < SrcWidth);

+ unsigned ExtraWidth = SrcWidth - RoundWidth;

+ assert(ExtraWidth < RoundWidth);

+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&

+ "Load size not an integral number of bytes!");

+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);

+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);

+ SDValue Lo, Hi, Ch;

+ unsigned IncrementSize;

+ if (TLI.isLittleEndian()) {

+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)

+ // Load the bottom RoundWidth bits.

+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,

+ Node->getValueType(0), Tmp1, Tmp2,

+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,

+ Alignment);

+ // Load the remaining ExtraWidth bits.

+ IncrementSize = RoundWidth / 8;

+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,

+ DAG.getIntPtrConstant(IncrementSize));

+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,

+ LD->getSrcValue(), SVOffset + IncrementSize,

+ ExtraVT, isVolatile,

+ MinAlign(Alignment, IncrementSize));

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ // Move the top bits to the right place.

+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,

+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));

+ // Join the hi and lo parts.

+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);

+ } else {

+ // Big endian - avoid unaligned loads.

+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8

+ // Load the top RoundWidth bits.

+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,

+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,

+ Alignment);

+ // Load the remaining ExtraWidth bits.

+ IncrementSize = RoundWidth / 8;

+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,

+ DAG.getIntPtrConstant(IncrementSize));

+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,

+ Node->getValueType(0), Tmp1, Tmp2,

+ LD->getSrcValue(), SVOffset + IncrementSize,

+ ExtraVT, isVolatile,

+ MinAlign(Alignment, IncrementSize));

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ // Move the top bits to the right place.

+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,

+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));

+ // Join the hi and lo parts.

+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);

+ }

+ Tmp1 = LegalizeOp(Result);

+ Tmp2 = LegalizeOp(Ch);

+ } else {

+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {

+ default: assert(0 && "This action is not supported yet!");

+ case TargetLowering::Custom:

+ isCustom = true;

+ // FALLTHROUGH

+ case TargetLowering::Legal:

+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());

+ Tmp1 = Result.getValue(0);

+ Tmp2 = Result.getValue(1);

+ if (isCustom) {

+ Tmp3 = TLI.LowerOperation(Result, DAG);

+ if (Tmp3.getNode()) {

+ Tmp1 = LegalizeOp(Tmp3);

+ Tmp2 = LegalizeOp(Tmp3.getValue(1));

+ }

+ } else {

+ // If this is an unaligned load and the target doesn't support it,

+ // expand it.

+ if (!TLI.allowsUnalignedMemoryAccesses()) {

+ unsigned ABIAlignment = TLI.getTargetData()->

+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());

+ if (LD->getAlignment() < ABIAlignment){

+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,

+ TLI);

+ Tmp1 = Result.getOperand(0);

+ Tmp2 = Result.getOperand(1);

+ Tmp1 = LegalizeOp(Tmp1);

+ Tmp2 = LegalizeOp(Tmp2);

+ }

+ break;

+ case TargetLowering::Expand:

+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND

+ if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {

+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),

+ LD->getSrcValueOffset(),

+ LD->isVolatile(), LD->getAlignment());

+ Result = DAG.getNode(ISD::FP_EXTEND, dl,

+ Node->getValueType(0), Load);

+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.

+ Tmp2 = LegalizeOp(Load.getValue(1));

+ break;

+ }

+ assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");

+ // Turn the unsupported load into an EXTLOAD followed by an explicit

+ // zero/sign extend inreg.

+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),

+ Tmp1, Tmp2, LD->getSrcValue(),

+ LD->getSrcValueOffset(), SrcVT,

+ LD->isVolatile(), LD->getAlignment());

+ SDValue ValRes;

+ if (ExtType == ISD::SEXTLOAD)

+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,

+ Result.getValueType(),

+ Result, DAG.getValueType(SrcVT));

+ else

+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);

+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.

+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.

+ break;

+ }

+ // Since loads produce two values, make sure to remember that we legalized

+ // both of them.

+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);

+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);

+ return Op.getResNo() ? Tmp2 : Tmp1;

+ }

+ case ISD::STORE: {

+ StoreSDNode *ST = cast<StoreSDNode>(Node);

+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.

+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.

+ int SVOffset = ST->getSrcValueOffset();

+ unsigned Alignment = ST->getAlignment();

+ bool isVolatile = ST->isVolatile();

+ if (!ST->isTruncatingStore()) {

+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'

+ // FIXME: We shouldn't do this for TargetConstantFP's.

+ // FIXME: move this to the DAG Combiner! Note that we can't regress due

+ // to phase ordering between legalized code and the dag combiner. This

+ // probably means that we need to integrate dag combiner and legalizer

+ // together.

+ // We generally can't do this one for long doubles.

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {

+ if (CFP->getValueType(0) == MVT::f32 &&

+ getTypeAction(MVT::i32) == Legal) {

+ Tmp3 = DAG.getConstant(CFP->getValueAPF().

+ bitcastToAPInt().zextOrTrunc(32),

+ MVT::i32);

+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset, isVolatile, Alignment);

+ break;

+ } else if (CFP->getValueType(0) == MVT::f64) {

+ // If this target supports 64-bit registers, do a single 64-bit store.

+ if (getTypeAction(MVT::i64) == Legal) {

+ Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().

+ zextOrTrunc(64), MVT::i64);

+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset, isVolatile, Alignment);

+ break;

+ } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {

+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit

+ // stores. If the target supports neither 32- nor 64-bits, this

+ // xform is certainly not worth it.

+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();

+ SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);

+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);

+ if (TLI.isBigEndian()) std::swap(Lo, Hi);

+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),

+ SVOffset, isVolatile, Alignment);

+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,

+ DAG.getIntPtrConstant(4));

+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,

+ isVolatile, MinAlign(Alignment, 4U));

+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+ break;

+ }

+ {

+ Tmp3 = LegalizeOp(ST->getValue());

+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,

+ ST->getOffset());

+ MVT VT = Tmp3.getValueType();

+ switch (TLI.getOperationAction(ISD::STORE, VT)) {

+ default: assert(0 && "This action is not supported yet!");

+ case TargetLowering::Legal:

+ // If this is an unaligned store and the target doesn't support it,

+ // expand it.

+ if (!TLI.allowsUnalignedMemoryAccesses()) {

+ unsigned ABIAlignment = TLI.getTargetData()->

+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());

+ if (ST->getAlignment() < ABIAlignment)

+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,

+ TLI);

+ }

+ break;

+ case TargetLowering::Custom:

+ Tmp1 = TLI.LowerOperation(Result, DAG);

+ if (Tmp1.getNode()) Result = Tmp1;

+ break;

+ case TargetLowering::Promote:

+ assert(VT.isVector() && "Unknown legal promote case!");

+ Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,

+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);

+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,

+ ST->getSrcValue(), SVOffset, isVolatile,

+ Alignment);

+ break;

+ }

+ break;

+ }

+ } else {

+ Tmp3 = LegalizeOp(ST->getValue());

+ MVT StVT = ST->getMemoryVT();

+ unsigned StWidth = StVT.getSizeInBits();

+ if (StWidth != StVT.getStoreSizeInBits()) {

+ // Promote to a byte-sized store with upper bits zero if not

+ // storing an integral number of bytes. For example, promote

+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)

+ MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits());

+ Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);

+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset, NVT, isVolatile, Alignment);

+ } else if (StWidth & (StWidth - 1)) {

+ // If not storing a power-of-2 number of bits, expand as two stores.

+ assert(StVT.isExtended() && !StVT.isVector() &&

+ "Unsupported truncstore!");

+ unsigned RoundWidth = 1 << Log2_32(StWidth);

+ assert(RoundWidth < StWidth);

+ unsigned ExtraWidth = StWidth - RoundWidth;

+ assert(ExtraWidth < RoundWidth);

+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&

+ "Store size not an integral number of bytes!");

+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);

+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);

+ SDValue Lo, Hi;

+ unsigned IncrementSize;

+ if (TLI.isLittleEndian()) {

+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)

+ // Store the bottom RoundWidth bits.

+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset, RoundVT,

+ isVolatile, Alignment);

+ // Store the remaining ExtraWidth bits.

+ IncrementSize = RoundWidth / 8;

+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,

+ DAG.getIntPtrConstant(IncrementSize));

+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,

+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));

+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),

+ SVOffset + IncrementSize, ExtraVT, isVolatile,

+ MinAlign(Alignment, IncrementSize));

+ } else {

+ // Big endian - avoid unaligned stores.

+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X

+ // Store the top RoundWidth bits.

+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,

+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));

+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),

+ SVOffset, RoundVT, isVolatile, Alignment);

+ // Store the remaining ExtraWidth bits.

+ IncrementSize = RoundWidth / 8;

+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,

+ DAG.getIntPtrConstant(IncrementSize));

+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset + IncrementSize, ExtraVT, isVolatile,

+ MinAlign(Alignment, IncrementSize));

+ }

+ // The order of the stores doesn't matter.

+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+ } else {

+ if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||

+ Tmp2 != ST->getBasePtr())

+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,

+ ST->getOffset());

+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {

+ default: assert(0 && "This action is not supported yet!");

+ case TargetLowering::Legal:

+ // If this is an unaligned store and the target doesn't support it,

+ // expand it.

+ if (!TLI.allowsUnalignedMemoryAccesses()) {

+ unsigned ABIAlignment = TLI.getTargetData()->

+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());

+ if (ST->getAlignment() < ABIAlignment)

+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,

+ TLI);

+ }

+ break;

+ case TargetLowering::Custom:

+ Result = TLI.LowerOperation(Result, DAG);

+ break;

+ case Expand:

+ // TRUNCSTORE:i16 i32 -> STORE i16

+ assert(isTypeLegal(StVT) && "Do not know how to expand this store!");

+ Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);

+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),

+ SVOffset, isVolatile, Alignment);

+ break;

+ }

+ break;

+ }

+ assert(Result.getValueType() == Op.getValueType() &&

+ "Bad legalization!");

+ // Make sure that the generated code is itself legal.

+ if (Result != Op)

+ Result = LegalizeOp(Result);

+ // Note that LegalizeOp may be reentered even from single-use nodes, which

+ // means that we always must cache transformed nodes.

+ AddLegalizedOperand(Op, Result);

+ return Result;

+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {

+ SDValue Vec = Op.getOperand(0);

+ SDValue Idx = Op.getOperand(1);

+ DebugLoc dl = Op.getDebugLoc();

+ // Store the value to a temporary stack slot, then LOAD the returned part.

+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());

+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);

+ // Add the offset to the index.

+ unsigned EltSize =

+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;

+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,

+ DAG.getConstant(EltSize, Idx.getValueType()));

+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))

+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);

+ else

+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);

+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);

+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);

+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {

+ DebugLoc dl = Node->getDebugLoc();

+ SDValue Tmp1 = Node->getOperand(0);

+ SDValue Tmp2 = Node->getOperand(1);

+ assert((Tmp2.getValueType() == MVT::f32 ||

+ Tmp2.getValueType() == MVT::f64) &&

+ "Ugly special-cased code!");

+ // Get the sign bit of the RHS.

+ SDValue SignBit;

+ MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;

+ if (isTypeLegal(IVT)) {

+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);

+ } else {

+ assert(isTypeLegal(TLI.getPointerTy()) &&

+ (TLI.getPointerTy() == MVT::i32 ||

+ TLI.getPointerTy() == MVT::i64) &&

+ "Legal type for load?!");

+ SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());

+ SDValue StorePtr = StackPtr, LoadPtr = StackPtr;

+ SDValue Ch =

+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);

+ if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())

+ LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),

+ LoadPtr, DAG.getIntPtrConstant(4));

+ SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),

+ Ch, LoadPtr, NULL, 0, MVT::i32);

+ }

+ SignBit =

+ DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),

+ SignBit, DAG.getConstant(0, SignBit.getValueType()),

+ ISD::SETLT);

+ // Get the absolute value of the result.

+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);

+ // Select between the nabs and abs value based on the sign bit of

+ // the input.

+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,

+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),

+ AbsVal);

+SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {

+ DebugLoc dl = Node->getDebugLoc();

+ DwarfWriter *DW = DAG.getDwarfWriter();

+ bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC,

+ MVT::Other);

+ bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);

+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);

+ GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit());

+ if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) {

+ DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit()));

+ unsigned Line = DSP->getLine();

+ unsigned Col = DSP->getColumn();

+ if (OptLevel == CodeGenOpt::None) {

+ // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it

+ // won't hurt anything.

+ if (useDEBUG_LOC) {

+ return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),

+ DAG.getConstant(Line, MVT::i32),

+ DAG.getConstant(Col, MVT::i32),

+ DAG.getSrcValue(CU.getGV()));

+ } else {

+ unsigned ID = DW->RecordSourceLine(Line, Col, CU);

+ return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);

+ }

+ return Node->getOperand(0);

+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,

+ SmallVectorImpl<SDValue> &Results) {

+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();

+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"

+ " not tell us which reg is the stack pointer!");

+ DebugLoc dl = Node->getDebugLoc();

+ MVT VT = Node->getValueType(0);

+ SDValue Tmp1 = SDValue(Node, 0);

+ SDValue Tmp2 = SDValue(Node, 1);

+ SDValue Tmp3 = Node->getOperand(2);

+ SDValue Chain = Tmp1.getOperand(0);

+ // Chain the dynamic stack allocation so that it doesn't modify the stack

+ // pointer when other instructions are using the stack.

+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));

+ SDValue Size = Tmp2.getOperand(1);

+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);

+ Chain = SP.getValue(1);

+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();

+ unsigned StackAlign =

+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();

+ if (Align > StackAlign)

+ SP = DAG.getNode(ISD::AND, dl, VT, SP,

+ DAG.getConstant(-(uint64_t)Align, VT));

+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value

+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain

+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),

+ DAG.getIntPtrConstant(0, true), SDValue());

+ Results.push_back(Tmp1);

+ Results.push_back(Tmp2);

+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and

+/// condition code CC on the current target. This routine assumes LHS and rHS

+/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with

+/// illegal condition code into AND / OR of multiple SETCC values.

+void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,

+ SDValue &LHS, SDValue &RHS,

+ SDValue &CC,

+ DebugLoc dl) {

+ MVT OpVT = LHS.getValueType();

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();

+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {

+ default: assert(0 && "Unknown condition code action!");

+ case TargetLowering::Legal:

+ // Nothing to do.

+ break;

+ case TargetLowering::Expand: {

+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;

+ unsigned Opc = 0;

+ switch (CCCode) {

+ default: assert(0 && "Don't know how to expand this condition!"); abort();

+ case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;

+ case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;

+ // FIXME: Implement more expansions.

+ }

+ SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);

+ SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);

+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);

+ RHS = SDValue();

+ CC = SDValue();

+ break;

+ }

+/// EmitStackConvert - Emit a store/load combination to the stack. This stores

+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does

+/// a load from the stack slot to DestVT, extending it if needed.

+/// The resultant code need not be legal.

+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,

+ MVT SlotVT,

+ MVT DestVT,

+ DebugLoc dl) {

+ // Create the stack frame object.

+ unsigned SrcAlign =

+ TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().

+ getTypeForMVT());

+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);

+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);

+ int SPFI = StackPtrFI->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);

+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();

+ unsigned SlotSize = SlotVT.getSizeInBits();

+ unsigned DestSize = DestVT.getSizeInBits();

+ unsigned DestAlign =

+ TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT());

+ // Emit a store to the stack slot. Use a truncstore if the input value is

+ // later than DestVT.

+ SDValue Store;

+ if (SrcSize > SlotSize)

+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,

+ SV, 0, SlotVT, false, SrcAlign);

+ else {

+ assert(SrcSize == SlotSize && "Invalid store");

+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,

+ SV, 0, false, SrcAlign);

+ }

+ // Result is a load from the stack slot.

+ if (SlotSize == DestSize)

+ return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);

+ assert(SlotSize < DestSize && "Unknown extension!");

+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,

+ false, DestAlign);

+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {

+ DebugLoc dl = Node->getDebugLoc();

+ // Create a vector sized/aligned stack slot, store the value to element #0,

+ // then load the whole vector back out.

+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));

+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);

+ int SPFI = StackPtrFI->getIndex();

+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),

+ StackPtr,

+ PseudoSourceValue::getFixedStack(SPFI), 0,

+ Node->getValueType(0).getVectorElementType());

+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,

+ PseudoSourceValue::getFixedStack(SPFI), 0);

+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't

+/// support the operation, but do support the resultant vector type.

+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {

+ unsigned NumElems = Node->getNumOperands();

+ SDValue SplatValue = Node->getOperand(0);

+ DebugLoc dl = Node->getDebugLoc();

+ MVT VT = Node->getValueType(0);

+ MVT OpVT = SplatValue.getValueType();

+ MVT EltVT = VT.getVectorElementType();

+ // If the only non-undef value is the low element, turn this into a

+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.

+ bool isOnlyLowElement = true;

+ // FIXME: it would be far nicer to change this into map<SDValue,uint64_t>

+ // and use a bitmask instead of a list of elements.

+ // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.

+ std::map<SDValue, std::vector<unsigned> > Values;

+ Values[SplatValue].push_back(0);

+ bool isConstant = true;

+ if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&

+ SplatValue.getOpcode() != ISD::UNDEF)

+ isConstant = false;

+ for (unsigned i = 1; i < NumElems; ++i) {

+ SDValue V = Node->getOperand(i);

+ Values[V].push_back(i);

+ if (V.getOpcode() != ISD::UNDEF)

+ isOnlyLowElement = false;

+ if (SplatValue != V)

+ SplatValue = SDValue(0, 0);

+ // If this isn't a constant element or an undef, we can't use a constant

+ // pool load.

+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&

+ V.getOpcode() != ISD::UNDEF)

+ isConstant = false;

+ }

+ if (isOnlyLowElement) {

+ // If the low element is an undef too, then this whole things is an undef.

+ if (Node->getOperand(0).getOpcode() == ISD::UNDEF)

+ return DAG.getUNDEF(VT);

+ // Otherwise, turn this into a scalar_to_vector node.

+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));

+ }

+ // If all elements are constants, create a load from the constant pool.

+ if (isConstant) {

+ std::vector<Constant*> CV;

+ for (unsigned i = 0, e = NumElems; i != e; ++i) {

+ if (ConstantFPSDNode *V =

+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {

+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));

+ } else if (ConstantSDNode *V =

+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {

+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));

+ } else {

+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);

+ const Type *OpNTy = OpVT.getTypeForMVT();

+ CV.push_back(UndefValue::get(OpNTy));

+ }

+ Constant *CP = ConstantVector::get(CV);

+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());

+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();

+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,

+ PseudoSourceValue::getConstantPool(), 0,

+ false, Alignment);

+ }

+ if (SplatValue.getNode()) { // Splat of one value?

+ // Build the shuffle constant vector: <0, 0, 0, 0>

+ SmallVector<int, 8> ZeroVec(NumElems, 0);

+ // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.

+ if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {

+ // Get the splatted value into the low element of a vector register.

+ SDValue LowValVec =

+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);

+ // Return shuffle(LowValVec, undef, <0,0,0,0>)

+ return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),

+ &ZeroVec[0]);

+ }

+ // If there are only two unique elements, we may be able to turn this into a

+ // vector shuffle.

+ if (Values.size() == 2) {

+ // Get the two values in deterministic order.

+ SDValue Val1 = Node->getOperand(1);

+ SDValue Val2;

+ std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin();

+ if (MI->first != Val1)

+ Val2 = MI->first;

+ else

+ Val2 = (++MI)->first;

+ // If Val1 is an undef, make sure it ends up as Val2, to ensure that our

+ // vector shuffle has the undef vector on the RHS.

+ if (Val1.getOpcode() == ISD::UNDEF)

+ std::swap(Val1, Val2);

+ // Build the shuffle constant vector: e.g. <0, 4, 0, 4>

+ SmallVector<int, 8> ShuffleMask(NumElems, -1);

+ // Set elements of the shuffle mask for Val1.

+ std::vector<unsigned> &Val1Elts = Values[Val1];

+ for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)

+ ShuffleMask[Val1Elts[i]] = 0;

+ // Set elements of the shuffle mask for Val2.

+ std::vector<unsigned> &Val2Elts = Values[Val2];

+ for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)

+ if (Val2.getOpcode() != ISD::UNDEF)

+ ShuffleMask[Val2Elts[i]] = NumElems;

+ // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.

+ if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&

+ TLI.isShuffleMaskLegal(ShuffleMask, VT)) {

+ Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);

+ Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);

+ return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);

+ }

+ // Otherwise, we can't handle this case efficiently. Allocate a sufficiently

+ // aligned object on the stack, store each element into it, then load

+ // the result as a vector.

+ // Create the stack frame object.

+ SDValue FIPtr = DAG.CreateStackTemporary(VT);

+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(FI);

+ // Emit a store of each element to the stack slot.

+ SmallVector<SDValue, 8> Stores;

+ unsigned TypeByteSize = OpVT.getSizeInBits() / 8;

+ // Store (in the right endianness) the elements to memory.

+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {

+ // Ignore undef elements.

+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;

+ unsigned Offset = TypeByteSize*i;

+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());

+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);

+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),

+ Idx, SV, Offset));

+ }

+ SDValue StoreChain;

+ if (!Stores.empty()) // Not all undef elements?

+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ &Stores[0], Stores.size());

+ else

+ StoreChain = DAG.getEntryNode();

+ // Result is a load from the stack slot.

+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);

+// ExpandLibCall - Expand a node into a call to a libcall. If the result value

+// does not fit into a register, return the lo part and set the hi part to the

+// by-reg argument. If it does fit into a single register, return the result

+// and leave the Hi part unset.

+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,

+ bool isSigned) {

+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");

+ // The input chain to this libcall is the entry node of the function.

+ // Legalizing the call will automatically add the previous call to the

+ // dependence.

+ SDValue InChain = DAG.getEntryNode();

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {

+ MVT ArgVT = Node->getOperand(i).getValueType();

+ const Type *ArgTy = ArgVT.getTypeForMVT();

+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;

+ Entry.isSExt = isSigned;

+ Entry.isZExt = !isSigned;

+ Args.push_back(Entry);

+ }

+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),

+ TLI.getPointerTy());

+ // Splice the libcall in wherever FindInputOutputChains tells us to.

+ const Type *RetTy = Node->getValueType(0).getTypeForMVT();

+ std::pair<SDValue, SDValue> CallInfo =

+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,

+ CallingConv::C, false, Callee, Args, DAG,

+ Node->getDebugLoc());

+ // Legalize the call sequence, starting with the chain. This will advance

+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that

+ // was added by LowerCallTo (guaranteeing proper serialization of calls).

+ LegalizeOp(CallInfo.second);

+ return CallInfo.first;

+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,

+ RTLIB::Libcall Call_F32,

+ RTLIB::Libcall Call_F64,

+ RTLIB::Libcall Call_F80,

+ RTLIB::Libcall Call_PPCF128) {

+ RTLIB::Libcall LC;

+ switch (Node->getValueType(0).getSimpleVT()) {

+ default: assert(0 && "Unexpected request for libcall!");

+ case MVT::f32: LC = Call_F32; break;

+ case MVT::f64: LC = Call_F64; break;

+ case MVT::f80: LC = Call_F80; break;

+ case MVT::ppcf128: LC = Call_PPCF128; break;

+ }

+ return ExpandLibCall(LC, Node, false);

+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,

+ RTLIB::Libcall Call_I16,

+ RTLIB::Libcall Call_I32,

+ RTLIB::Libcall Call_I64,

+ RTLIB::Libcall Call_I128) {

+ RTLIB::Libcall LC;

+ switch (Node->getValueType(0).getSimpleVT()) {

+ default: assert(0 && "Unexpected request for libcall!");

+ case MVT::i16: LC = Call_I16; break;

+ case MVT::i32: LC = Call_I32; break;

+ case MVT::i64: LC = Call_I64; break;

+ case MVT::i128: LC = Call_I128; break;

+ }

+ return ExpandLibCall(LC, Node, isSigned);

+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a

+/// INT_TO_FP operation of the specified operand when the target requests that

+/// we expand it. At this point, we know that the result and operand types are

+/// legal for the target.

+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,

+ SDValue Op0,

+ MVT DestVT,

+ DebugLoc dl) {

+ if (Op0.getValueType() == MVT::i32) {

+ // simple 32-bit [signed|unsigned] integer to float/double expansion

+ // Get the stack frame index of a 8 byte buffer.

+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);

+ // word offset constant for Hi/Lo address computation

+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());

+ // set up Hi and Lo (into buffer) address based on endian

+ SDValue Hi = StackSlot;

+ SDValue Lo = DAG.getNode(ISD::ADD, dl,

+ TLI.getPointerTy(), StackSlot, WordOff);

+ if (TLI.isLittleEndian())

+ std::swap(Hi, Lo);

+ // if signed map to unsigned space

+ SDValue Op0Mapped;

+ if (isSigned) {

+ // constant used to invert sign bit (signed to unsigned mapping)

+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);

+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);

+ } else {

+ Op0Mapped = Op0;

+ }

+ // store the lo of the constructed double - based on integer input

+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,

+ Op0Mapped, Lo, NULL, 0);

+ // initial hi portion of constructed double

+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);

+ // store the hi of the constructed double - biased exponent

+ SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);

+ // load the constructed double

+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);

+ // FP constant to bias correct the final result

+ SDValue Bias = DAG.getConstantFP(isSigned ?

+ BitsToDouble(0x4330000080000000ULL) :

+ BitsToDouble(0x4330000000000000ULL),

+ MVT::f64);

+ // subtract the bias

+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);

+ // final result

+ SDValue Result;

+ // handle final rounding

+ if (DestVT == MVT::f64) {

+ // do nothing

+ Result = Sub;

+ } else if (DestVT.bitsLT(MVT::f64)) {

+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,

+ DAG.getIntPtrConstant(0));

+ } else if (DestVT.bitsGT(MVT::f64)) {

+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);

+ }

+ return Result;

+ }

+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");

+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);

+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),

+ Op0, DAG.getConstant(0, Op0.getValueType()),

+ ISD::SETLT);

+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);

+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),

+ SignSet, Four, Zero);

+ // If the sign bit of the integer is set, the large number will be treated

+ // as a negative number. To counteract this, the dynamic code adds an

+ // offset depending on the data type.

+ uint64_t FF;

+ switch (Op0.getValueType().getSimpleVT()) {

+ default: assert(0 && "Unsupported integer type!");

+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)

+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)

+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)

+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)

+ }

+ if (TLI.isLittleEndian()) FF <<= 32;

+ Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);

+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());

+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();

+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);

+ Alignment = std::min(Alignment, 4u);

+ SDValue FudgeInReg;

+ if (DestVT == MVT::f32)

+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,

+ PseudoSourceValue::getConstantPool(), 0,

+ false, Alignment);

+ else {

+ FudgeInReg =

+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,

+ DAG.getEntryNode(), CPIdx,

+ PseudoSourceValue::getConstantPool(), 0,

+ MVT::f32, false, Alignment));

+ }

+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);

+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a

+/// *INT_TO_FP operation of the specified operand when the target requests that

+/// we promote it. At this point, we know that the result and operand types are

+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP

+/// operation that takes a larger input.

+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,

+ MVT DestVT,

+ bool isSigned,

+ DebugLoc dl) {

+ // First step, figure out the appropriate *INT_TO_FP operation to use.

+ MVT NewInTy = LegalOp.getValueType();

+ unsigned OpToUse = 0;

+ // Scan for the appropriate larger type to use.

+ while (1) {

+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);

+ assert(NewInTy.isInteger() && "Ran out of possibilities!");

+ // If the target supports SINT_TO_FP of this type, use it.

+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {

+ OpToUse = ISD::SINT_TO_FP;

+ break;

+ }

+ if (isSigned) continue;

+ // If the target supports UINT_TO_FP of this type, use it.

+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {

+ OpToUse = ISD::UINT_TO_FP;

+ break;

+ }

+ // Otherwise, try a larger type.

+ }

+ // Okay, we found the operation and type to use. Zero extend our input to the

+ // desired type then run the operation on it.

+ return DAG.getNode(OpToUse, dl, DestVT,

+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,

+ dl, NewInTy, LegalOp));

+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a

+/// FP_TO_*INT operation of the specified operand when the target requests that

+/// we promote it. At this point, we know that the result and operand types are

+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT

+/// operation that returns a larger result.

+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,

+ MVT DestVT,

+ bool isSigned,

+ DebugLoc dl) {

+ // First step, figure out the appropriate FP_TO*INT operation to use.

+ MVT NewOutTy = DestVT;

+ unsigned OpToUse = 0;

+ // Scan for the appropriate larger type to use.

+ while (1) {

+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1);

+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");

+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {

+ OpToUse = ISD::FP_TO_SINT;

+ break;

+ }

+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {

+ OpToUse = ISD::FP_TO_UINT;

+ break;

+ }

+ // Otherwise, try a larger type.

+ }

+ // Okay, we found the operation and type to use.

+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);

+ // Truncate the result of the extended FP_TO_*INT operation to the desired

+ // size.

+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);

+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.

+///

+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {

+ MVT VT = Op.getValueType();

+ MVT SHVT = TLI.getShiftAmountTy();

+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;

+ switch (VT.getSimpleVT()) {

+ default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();

+ case MVT::i16:

+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);

+ case MVT::i32:

+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));

+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));

+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));

+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));

+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);

+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);

+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);

+ case MVT::i64:

+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));

+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));

+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));

+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));

+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));

+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));

+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));

+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));

+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));

+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));

+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));

+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));

+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));

+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);

+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);

+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);

+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);

+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);

+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);

+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);

+ }

+/// ExpandBitCount - Expand the specified bitcount instruction into operations.

+///

+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,

+ DebugLoc dl) {

+ switch (Opc) {

+ default: assert(0 && "Cannot expand this yet!");

+ case ISD::CTPOP: {

+ static const uint64_t mask[6] = {

+ 0x5555555555555555ULL, 0x3333333333333333ULL,

+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,

+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL

+ };

+ MVT VT = Op.getValueType();

+ MVT ShVT = TLI.getShiftAmountTy();

+ unsigned len = VT.getSizeInBits();

+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {

+ //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])

+ unsigned EltSize = VT.isVector() ?

+ VT.getVectorElementType().getSizeInBits() : len;

+ SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);

+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);

+ Op = DAG.getNode(ISD::ADD, dl, VT,

+ DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),

+ DAG.getNode(ISD::AND, dl, VT,

+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),

+ Tmp2));

+ }

+ return Op;

+ }

+ case ISD::CTLZ: {

+ // for now, we do this:

+ // x = x | (x >> 1);

+ // x = x | (x >> 2);

+ // ...

+ // x = x | (x >>16);

+ // x = x | (x >>32); // for 64-bit input

+ // return popcount(~x);

+ //

+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc

+ MVT VT = Op.getValueType();

+ MVT ShVT = TLI.getShiftAmountTy();

+ unsigned len = VT.getSizeInBits();

+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {

+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);

+ Op = DAG.getNode(ISD::OR, dl, VT, Op,

+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));

+ }

+ Op = DAG.getNOT(dl, Op, VT);

+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);

+ }

+ case ISD::CTTZ: {

+ // for now, we use: { return popcount(~x & (x - 1)); }

+ // unless the target has ctlz but not ctpop, in which case we use:

+ // { return 32 - nlz(~x & (x-1)); }

+ // see also http://www.hackersdelight.org/HDcode/ntz.cc

+ MVT VT = Op.getValueType();

+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,

+ DAG.getNOT(dl, Op, VT),

+ DAG.getNode(ISD::SUB, dl, VT, Op,

+ DAG.getConstant(1, VT)));

+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.

+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&

+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))

+ return DAG.getNode(ISD::SUB, dl, VT,

+ DAG.getConstant(VT.getSizeInBits(), VT),

+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));

+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);

+ }

+void SelectionDAGLegalize::ExpandNode(SDNode *Node,

+ SmallVectorImpl<SDValue> &Results) {

+ DebugLoc dl = Node->getDebugLoc();

+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;

+ switch (Node->getOpcode()) {

+ case ISD::CTPOP:

+ case ISD::CTLZ:

+ case ISD::CTTZ:

+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::BSWAP:

+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));

+ break;

+ case ISD::FRAMEADDR:

+ case ISD::RETURNADDR:

+ case ISD::FRAME_TO_ARGS_OFFSET:

+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));

+ break;

+ case ISD::FLT_ROUNDS_:

+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));

+ break;

+ case ISD::EH_RETURN:

+ case ISD::DECLARE:

+ case ISD::DBG_LABEL:

+ case ISD::EH_LABEL:

+ case ISD::PREFETCH:

+ case ISD::MEMBARRIER:

+ case ISD::VAEND:

+ Results.push_back(Node->getOperand(0));

+ break;

+ case ISD::DBG_STOPPOINT:

+ Results.push_back(ExpandDBG_STOPPOINT(Node));

+ break;

+ case ISD::DYNAMIC_STACKALLOC:

+ ExpandDYNAMIC_STACKALLOC(Node, Results);

+ break;

+ case ISD::MERGE_VALUES:

+ for (unsigned i = 0; i < Node->getNumValues(); i++)

+ Results.push_back(Node->getOperand(i));

+ break;

+ case ISD::UNDEF: {

+ MVT VT = Node->getValueType(0);

+ if (VT.isInteger())

+ Results.push_back(DAG.getConstant(0, VT));

+ else if (VT.isFloatingPoint())

+ Results.push_back(DAG.getConstantFP(0, VT));

+ else

+ assert(0 && "Unknown value type!");

+ break;

+ }

+ case ISD::TRAP: {

+ // If this operation is not supported, lower it to 'abort()' call

+ TargetLowering::ArgListTy Args;

+ std::pair<SDValue, SDValue> CallResult =

+ TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,

+ false, false, false, false, CallingConv::C, false,

+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),

+ Args, DAG, dl);

+ Results.push_back(CallResult.second);

+ break;

+ }

+ case ISD::FP_ROUND:

+ case ISD::BIT_CONVERT:

+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),

+ Node->getValueType(0), dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::FP_EXTEND:

+ Tmp1 = EmitStackConvert(Node->getOperand(0),

+ Node->getOperand(0).getValueType(),

+ Node->getValueType(0), dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::SIGN_EXTEND_INREG: {

+ // NOTE: we could fall back on load/store here too for targets without

+ // SAR. However, it is doubtful that any exist.

+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();

+ unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -

+ ExtraVT.getSizeInBits();

+ SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());

+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),

+ Node->getOperand(0), ShiftCst);

+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::FP_ROUND_INREG: {

+ // The only way we can lower this is to turn it into a TRUNCSTORE,

+ // EXTLOAD pair, targetting a temporary location (a stack slot).

+ // NOTE: there is a choice here between constantly creating new stack

+ // slots and always reusing the same one. We currently always create

+ // new ones, as reuse may inhibit scheduling.

+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();

+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,

+ Node->getValueType(0), dl);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::SINT_TO_FP:

+ case ISD::UINT_TO_FP:

+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,

+ Node->getOperand(0), Node->getValueType(0), dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::FP_TO_UINT: {

+ SDValue True, False;

+ MVT VT = Node->getOperand(0).getValueType();

+ MVT NVT = Node->getValueType(0);

+ const uint64_t zero[] = {0, 0};

+ APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));

+ APInt x = APInt::getSignBit(NVT.getSizeInBits());

+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);

+ Tmp1 = DAG.getConstantFP(apf, VT);

+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),

+ Node->getOperand(0),

+ Tmp1, ISD::SETLT);

+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));

+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,

+ DAG.getNode(ISD::FSUB, dl, VT,

+ Node->getOperand(0), Tmp1));

+ False = DAG.getNode(ISD::XOR, dl, NVT, False,

+ DAG.getConstant(x, NVT));

+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::VAARG: {

+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();

+ MVT VT = Node->getValueType(0);

+ Tmp1 = Node->getOperand(0);

+ Tmp2 = Node->getOperand(1);

+ SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);

+ // Increment the pointer, VAList, to the next vaarg

+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,

+ DAG.getConstant(TLI.getTargetData()->

+ getTypeAllocSize(VT.getTypeForMVT()),

+ TLI.getPointerTy()));

+ // Store the incremented VAList to the legalized pointer

+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);

+ // Load the actual argument out of the pointer VAList

+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));

+ Results.push_back(Results[0].getValue(1));

+ break;

+ }

+ case ISD::VACOPY: {

+ // This defaults to loading a pointer from the input and storing it to the

+ // output, returning the chain.

+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();

+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();

+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),

+ Node->getOperand(2), VS, 0);

+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::EXTRACT_VECTOR_ELT:

+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)

+ // This must be an access of the only element. Return it.

+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),

+ Node->getOperand(0));

+ else

+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));

+ Results.push_back(Tmp1);

+ break;

+ case ISD::EXTRACT_SUBVECTOR:

+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));

+ break;

+ case ISD::CONCAT_VECTORS: {

+ // Use extract/insert/build vector for now. We might try to be

+ // more clever later.

+ SmallVector<SDValue, 8> Ops;

+ unsigned NumOperands = Node->getNumOperands();

+ for (unsigned i=0; i < NumOperands; ++i) {

+ SDValue SubOp = Node->getOperand(i);

+ MVT VVT = SubOp.getNode()->getValueType(0);

+ MVT EltVT = VVT.getVectorElementType();

+ unsigned NumSubElem = VVT.getVectorNumElements();

+ for (unsigned j=0; j < NumSubElem; ++j) {

+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,

+ DAG.getIntPtrConstant(j)));

+ }

+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),

+ &Ops[0], Ops.size());

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::SCALAR_TO_VECTOR:

+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));

+ break;

+ case ISD::INSERT_VECTOR_ELT:

+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),

+ Node->getOperand(1),

+ Node->getOperand(2), dl));

+ break;

+ case ISD::VECTOR_SHUFFLE: {

+ SmallVector<int, 8> Mask;

+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);

+ MVT VT = Node->getValueType(0);

+ MVT EltVT = VT.getVectorElementType();

+ unsigned NumElems = VT.getVectorNumElements();

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0; i != NumElems; ++i) {

+ if (Mask[i] < 0) {

+ Ops.push_back(DAG.getUNDEF(EltVT));

+ continue;

+ }

+ unsigned Idx = Mask[i];

+ if (Idx < NumElems)

+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,

+ Node->getOperand(0),

+ DAG.getIntPtrConstant(Idx)));

+ else

+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,

+ Node->getOperand(1),

+ DAG.getIntPtrConstant(Idx - NumElems)));

+ }

+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::EXTRACT_ELEMENT: {

+ MVT OpTy = Node->getOperand(0).getValueType();

+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {

+ // 1 -> Hi

+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),

+ DAG.getConstant(OpTy.getSizeInBits()/2,

+ TLI.getShiftAmountTy()));

+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);

+ } else {

+ // 0 -> Lo

+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),

+ Node->getOperand(0));

+ }

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::STACKSAVE:

+ // Expand to CopyFromReg if the target set

+ // StackPointerRegisterToSaveRestore.

+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {

+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,

+ Node->getValueType(0)));

+ Results.push_back(Results[0].getValue(1));

+ } else {

+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));

+ Results.push_back(Node->getOperand(0));

+ }

+ break;

+ case ISD::STACKRESTORE:

+ // Expand to CopyToReg if the target set

+ // StackPointerRegisterToSaveRestore.

+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {

+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,

+ Node->getOperand(1)));

+ } else {

+ Results.push_back(Node->getOperand(0));

+ }

+ break;

+ case ISD::FCOPYSIGN:

+ Results.push_back(ExpandFCOPYSIGN(Node));

+ break;

+ case ISD::FNEG:

+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X

+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));

+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,

+ Node->getOperand(0));

+ Results.push_back(Tmp1);

+ break;

+ case ISD::FABS: {

+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).

+ MVT VT = Node->getValueType(0);

+ Tmp1 = Node->getOperand(0);

+ Tmp2 = DAG.getConstantFP(0.0, VT);

+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),

+ Tmp1, Tmp2, ISD::SETUGT);

+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);

+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::FSQRT:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,

+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));

+ break;

+ case ISD::FSIN:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,

+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128));

+ break;

+ case ISD::FCOS:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,

+ RTLIB::COS_F80, RTLIB::COS_PPCF128));

+ break;

+ case ISD::FLOG:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,

+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128));

+ break;

+ case ISD::FLOG2:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,

+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));

+ break;

+ case ISD::FLOG10:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,

+ RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));

+ break;

+ case ISD::FEXP:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,

+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128));

+ break;

+ case ISD::FEXP2:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,

+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));

+ break;

+ case ISD::FTRUNC:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,

+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));

+ break;

+ case ISD::FFLOOR:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,

+ RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));

+ break;

+ case ISD::FCEIL:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,

+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));

+ break;

+ case ISD::FRINT:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,

+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128));

+ break;

+ case ISD::FNEARBYINT:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,

+ RTLIB::NEARBYINT_F64,

+ RTLIB::NEARBYINT_F80,

+ RTLIB::NEARBYINT_PPCF128));

+ break;

+ case ISD::FPOWI:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,

+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128));

+ break;

+ case ISD::FPOW:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,

+ RTLIB::POW_F80, RTLIB::POW_PPCF128));

+ break;

+ case ISD::FDIV:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,

+ RTLIB::DIV_F80, RTLIB::DIV_PPCF128));

+ break;

+ case ISD::FREM:

+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,

+ RTLIB::REM_F80, RTLIB::REM_PPCF128));

+ break;

+ case ISD::ConstantFP: {

+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);

+ // Check to see if this FP immediate is already legal.

+ bool isLegal = false;

+ for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),

+ E = TLI.legal_fpimm_end(); I != E; ++I) {

+ if (CFP->isExactlyValue(*I)) {

+ isLegal = true;

+ break;

+ }

+ // If this is a legal constant, turn it into a TargetConstantFP node.

+ if (isLegal)

+ Results.push_back(SDValue(Node, 0));

+ else

+ Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));

+ break;

+ }

+ case ISD::EHSELECTION: {

+ unsigned Reg = TLI.getExceptionSelectorRegister();

+ assert(Reg && "Can't expand to unknown register!");

+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,

+ Node->getValueType(0)));

+ Results.push_back(Results[0].getValue(1));

+ break;

+ }

+ case ISD::EXCEPTIONADDR: {

+ unsigned Reg = TLI.getExceptionAddressRegister();

+ assert(Reg && "Can't expand to unknown register!");

+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,

+ Node->getValueType(0)));

+ Results.push_back(Results[0].getValue(1));

+ break;

+ }

+ case ISD::SUB: {

+ MVT VT = Node->getValueType(0);

+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&

+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&

+ "Don't know how to expand this subtraction!");

+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),

+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));

+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));

+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));

+ break;

+ }

+ case ISD::UREM:

+ case ISD::SREM: {

+ MVT VT = Node->getValueType(0);

+ SDVTList VTs = DAG.getVTList(VT, VT);

+ bool isSigned = Node->getOpcode() == ISD::SREM;

+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;

+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;

+ Tmp2 = Node->getOperand(0);

+ Tmp3 = Node->getOperand(1);

+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {

+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);

+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {

+ // X % Y -> X-X/Y*Y

+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);

+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);

+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);

+ } else if (isSigned) {

+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,

+ RTLIB::SREM_I64, RTLIB::SREM_I128);

+ } else {

+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,

+ RTLIB::UREM_I64, RTLIB::UREM_I128);

+ }

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::UDIV:

+ case ISD::SDIV: {

+ bool isSigned = Node->getOpcode() == ISD::SDIV;

+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;

+ MVT VT = Node->getValueType(0);

+ SDVTList VTs = DAG.getVTList(VT, VT);

+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))

+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),

+ Node->getOperand(1));

+ else if (isSigned)

+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,

+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);

+ else

+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,

+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::MULHU:

+ case ISD::MULHS: {

+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :

+ ISD::SMUL_LOHI;

+ MVT VT = Node->getValueType(0);

+ SDVTList VTs = DAG.getVTList(VT, VT);

+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&

+ "If this wasn't legal, it shouldn't have been created!");

+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),

+ Node->getOperand(1));

+ Results.push_back(Tmp1.getValue(1));

+ break;

+ }

+ case ISD::MUL: {

+ MVT VT = Node->getValueType(0);

+ SDVTList VTs = DAG.getVTList(VT, VT);

+ // See if multiply or divide can be lowered using two-result operations.

+ // We just need the low half of the multiply; try both the signed

+ // and unsigned forms. If the target supports both SMUL_LOHI and

+ // UMUL_LOHI, form a preference by checking which forms of plain

+ // MULH it supports.

+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);

+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);

+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);

+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);

+ unsigned OpToUse = 0;

+ if (HasSMUL_LOHI && !HasMULHS) {

+ OpToUse = ISD::SMUL_LOHI;

+ } else if (HasUMUL_LOHI && !HasMULHU) {

+ OpToUse = ISD::UMUL_LOHI;

+ } else if (HasSMUL_LOHI) {

+ OpToUse = ISD::SMUL_LOHI;

+ } else if (HasUMUL_LOHI) {

+ OpToUse = ISD::UMUL_LOHI;

+ }

+ if (OpToUse) {

+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),

+ Node->getOperand(1)));

+ break;

+ }

+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,

+ RTLIB::MUL_I64, RTLIB::MUL_I128);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::SADDO:

+ case ISD::SSUBO: {

+ SDValue LHS = Node->getOperand(0);

+ SDValue RHS = Node->getOperand(1);

+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?

+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),

+ LHS, RHS);

+ Results.push_back(Sum);

+ MVT OType = Node->getValueType(1);

+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());

+ // LHSSign -> LHS >= 0

+ // RHSSign -> RHS >= 0

+ // SumSign -> Sum >= 0

+ //

+ // Add:

+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)

+ // Sub:

+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)

+ //

+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);

+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);

+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,

+ Node->getOpcode() == ISD::SADDO ?

+ ISD::SETEQ : ISD::SETNE);

+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);

+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);

+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);

+ Results.push_back(Cmp);

+ break;

+ }

+ case ISD::UADDO:

+ case ISD::USUBO: {

+ SDValue LHS = Node->getOperand(0);

+ SDValue RHS = Node->getOperand(1);

+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?

+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),

+ LHS, RHS);

+ Results.push_back(Sum);

+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,

+ Node->getOpcode () == ISD::UADDO ?

+ ISD::SETULT : ISD::SETUGT));

+ break;

+ }

+ case ISD::BUILD_PAIR: {

+ MVT PairTy = Node->getValueType(0);

+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));

+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));

+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,

+ DAG.getConstant(PairTy.getSizeInBits()/2,

+ TLI.getShiftAmountTy()));

+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));

+ break;

+ }

+ case ISD::SELECT:

+ Tmp1 = Node->getOperand(0);

+ Tmp2 = Node->getOperand(1);

+ Tmp3 = Node->getOperand(2);

+ if (Tmp1.getOpcode() == ISD::SETCC) {

+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),

+ Tmp2, Tmp3,

+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());

+ } else {

+ Tmp1 = DAG.getSelectCC(dl, Tmp1,

+ DAG.getConstant(0, Tmp1.getValueType()),

+ Tmp2, Tmp3, ISD::SETNE);

+ }

+ Results.push_back(Tmp1);

+ break;

+ case ISD::BR_JT: {

+ SDValue Chain = Node->getOperand(0);

+ SDValue Table = Node->getOperand(1);

+ SDValue Index = Node->getOperand(2);

+ MVT PTy = TLI.getPointerTy();

+ MachineFunction &MF = DAG.getMachineFunction();

+ unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();

+ Index= DAG.getNode(ISD::MUL, dl, PTy,

+ Index, DAG.getConstant(EntrySize, PTy));

+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);

+ MVT MemVT = MVT::getIntegerVT(EntrySize * 8);

+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,

+ PseudoSourceValue::getJumpTable(), 0, MemVT);

+ Addr = LD;

+ if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {

+ // For PIC, the sequence is:

+ // BRIND(load(Jumptable + index) + RelocBase)

+ // RelocBase can be JumpTable, GOT or some sort of global base.

+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,

+ TLI.getPICJumpTableRelocBase(Table, DAG));

+ }

+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::BRCOND:

+ // Expand brcond's setcc into its constituent parts and create a BR_CC

+ // Node.

+ Tmp1 = Node->getOperand(0);

+ Tmp2 = Node->getOperand(1);

+ if (Tmp2.getOpcode() == ISD::SETCC) {

+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,

+ Tmp1, Tmp2.getOperand(2),

+ Tmp2.getOperand(0), Tmp2.getOperand(1),

+ Node->getOperand(2));

+ } else {

+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,

+ DAG.getCondCode(ISD::SETNE), Tmp2,

+ DAG.getConstant(0, Tmp2.getValueType()),

+ Node->getOperand(2));

+ }

+ Results.push_back(Tmp1);

+ break;

+ case ISD::SETCC: {

+ Tmp1 = Node->getOperand(0);

+ Tmp2 = Node->getOperand(1);

+ Tmp3 = Node->getOperand(2);

+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);

+ // If we expanded the SETCC into an AND/OR, return the new node

+ if (Tmp2.getNode() == 0) {

+ Results.push_back(Tmp1);

+ break;

+ }

+ // Otherwise, SETCC for the given comparison type must be completely

+ // illegal; expand it into a SELECT_CC.

+ MVT VT = Node->getValueType(0);

+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,

+ DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::SELECT_CC: {

+ Tmp1 = Node->getOperand(0); // LHS

+ Tmp2 = Node->getOperand(1); // RHS

+ Tmp3 = Node->getOperand(2); // True

+ Tmp4 = Node->getOperand(3); // False

+ SDValue CC = Node->getOperand(4);

+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),

+ Tmp1, Tmp2, CC, dl);

+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");

+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());

+ CC = DAG.getCondCode(ISD::SETNE);

+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,

+ Tmp3, Tmp4, CC);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::BR_CC: {

+ Tmp1 = Node->getOperand(0); // Chain

+ Tmp2 = Node->getOperand(2); // LHS

+ Tmp3 = Node->getOperand(3); // RHS

+ Tmp4 = Node->getOperand(1); // CC

+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),

+ Tmp2, Tmp3, Tmp4, dl);

+ LastCALLSEQ_END = DAG.getEntryNode();

+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");

+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());

+ Tmp4 = DAG.getCondCode(ISD::SETNE);

+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,

+ Tmp3, Node->getOperand(4));

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::GLOBAL_OFFSET_TABLE:

+ case ISD::GlobalAddress:

+ case ISD::GlobalTLSAddress:

+ case ISD::ExternalSymbol:

+ case ISD::ConstantPool:

+ case ISD::JumpTable:

+ case ISD::INTRINSIC_W_CHAIN:

+ case ISD::INTRINSIC_WO_CHAIN:

+ case ISD::INTRINSIC_VOID:

+ // FIXME: Custom lowering for these operations shouldn't return null!

+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)

+ Results.push_back(SDValue(Node, i));

+ break;

+ }

+void SelectionDAGLegalize::PromoteNode(SDNode *Node,

+ SmallVectorImpl<SDValue> &Results) {

+ MVT OVT = Node->getValueType(0);

+ if (Node->getOpcode() == ISD::UINT_TO_FP ||

+ Node->getOpcode() == ISD::SINT_TO_FP) {

+ OVT = Node->getOperand(0).getValueType();

+ }

+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);

+ DebugLoc dl = Node->getDebugLoc();

+ SDValue Tmp1, Tmp2, Tmp3;

+ switch (Node->getOpcode()) {

+ case ISD::CTTZ:

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ // Zero extend the argument.

+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));

+ // Perform the larger operation.

+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1);

+ if (Node->getOpcode() == ISD::CTTZ) {

+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)

+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),

+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),

+ ISD::SETEQ);

+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,

+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);

+ } else if (Node->getOpcode() == ISD::CTLZ) {

+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))

+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,

+ DAG.getConstant(NVT.getSizeInBits() -

+ OVT.getSizeInBits(), NVT));

+ }

+ Results.push_back(Tmp1);

+ break;

+ case ISD::BSWAP: {

+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();

+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1);

+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);

+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,

+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::FP_TO_UINT:

+ case ISD::FP_TO_SINT:

+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),

+ Node->getOpcode() == ISD::FP_TO_SINT, dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::UINT_TO_FP:

+ case ISD::SINT_TO_FP:

+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),

+ Node->getOpcode() == ISD::SINT_TO_FP, dl);

+ Results.push_back(Tmp1);

+ break;

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR:

+ assert(OVT.isVector() && "Don't know how to promote scalar logic ops");

+ // Bit convert each of the values to the new type.

+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));

+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));

+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);

+ // Bit convert the result back the original type.

+ Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1));

+ break;

+ case ISD::SELECT:

+ unsigned ExtOp, TruncOp;

+ if (Node->getValueType(0).isVector()) {

+ ExtOp = ISD::BIT_CONVERT;

+ TruncOp = ISD::BIT_CONVERT;

+ } else if (Node->getValueType(0).isInteger()) {

+ ExtOp = ISD::ANY_EXTEND;

+ TruncOp = ISD::TRUNCATE;

+ } else {

+ ExtOp = ISD::FP_EXTEND;

+ TruncOp = ISD::FP_ROUND;

+ }

+ Tmp1 = Node->getOperand(0);

+ // Promote each of the values to the new type.

+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));

+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));

+ // Perform the larger operation, then round down.

+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);

+ if (TruncOp != ISD::FP_ROUND)

+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);

+ else

+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,

+ DAG.getIntPtrConstant(0));

+ Results.push_back(Tmp1);

+ break;

+ case ISD::VECTOR_SHUFFLE: {

+ SmallVector<int, 8> Mask;

+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);

+ // Cast the two input vectors.

+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));

+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));

+ // Convert the shuffle mask to the right # elements.

+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);

+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);

+ Results.push_back(Tmp1);

+ break;

+ }

+ case ISD::SETCC: {

+ // First step, figure out the appropriate operation to use.

+ // Allow SETCC to not be supported for all legal data types

+ // Mostly this targets FP

+ MVT NewInTy = Node->getOperand(0).getValueType();

+ MVT OldVT = NewInTy; OldVT = OldVT;

+ // Scan for the appropriate larger type to use.

+ while (1) {

+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);

+ assert(NewInTy.isInteger() == OldVT.isInteger() &&

+ "Fell off of the edge of the integer world");

+ assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() &&

+ "Fell off of the edge of the floating point world");

+ // If the target supports SETCC of this type, use it.

+ if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy))

+ break;

+ }

+ if (NewInTy.isInteger())

+ assert(0 && "Cannot promote Legal Integer SETCC yet");

+ else {

+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1);

+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2);

+ }

+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),

+ Tmp1, Tmp2, Node->getOperand(2)));

+ break;

+ }

+// SelectionDAG::Legalize - This is the entry point for the file.

+//

+void SelectionDAG::Legalize(bool TypesNeedLegalizing,

+ CodeGenOpt::Level OptLevel) {

+ /// run - This is the main entry point to this class.

+ ///

+ SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();

diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..c3c1beabd5f0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

@@ -0,0 +1,1388 @@

+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements float type expansion and softening for LegalizeTypes.

+// Softening is the act of turning a computation in an illegal floating point

+// type into a computation in an integer type of the same size; also known as

+// "soft float". For example, turning f32 arithmetic into operations using i32.

+// The resulting integer value is the same as what you would get by performing

+// the floating point operation and bitcasting the result to the integer type.

+// Expansion is the act of changing a computation in an illegal type to be a

+// computation in two identical registers of a smaller type. For example,

+// implementing ppcf128 arithmetic in two f64 registers.

+//

+//===----------------------------------------------------------------------===//

+#include "LegalizeTypes.h"

+using namespace llvm;

+/// GetFPLibCall - Return the right libcall for the given floating point type.

+static RTLIB::Libcall GetFPLibCall(MVT VT,

+ RTLIB::Libcall Call_F32,

+ RTLIB::Libcall Call_F64,

+ RTLIB::Libcall Call_F80,

+ RTLIB::Libcall Call_PPCF128) {

+ return

+ VT == MVT::f32 ? Call_F32 :

+ VT == MVT::f64 ? Call_F64 :

+ VT == MVT::f80 ? Call_F80 :

+ VT == MVT::ppcf128 ? Call_PPCF128 :

+ RTLIB::UNKNOWN_LIBCALL;

+//===----------------------------------------------------------------------===//

+// Result Float to Integer Conversion.

+//===----------------------------------------------------------------------===//

+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue R = SDValue();

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "SoftenFloatResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to soften the result of this operator!");

+ abort();

+ case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;

+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;

+ case ISD::ConstantFP:

+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));

+ break;

+ case ISD::EXTRACT_VECTOR_ELT:

+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;

+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;

+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;

+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;

+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;

+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;

+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;

+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;

+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;

+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;

+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;

+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;

+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;

+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;

+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;

+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;

+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;

+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;

+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;

+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;

+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;

+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;

+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;

+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;

+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;

+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;

+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;

+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;

+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;

+ case ISD::SINT_TO_FP:

+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;

+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;

+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;

+ }

+ // If R is null, the sub-method took care of registering the result.

+ if (R.getNode())

+ SetSoftenedFloat(SDValue(N, ResNo), R);

+SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {

+ return BitConvertToInteger(N->getOperand(0));

+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {

+ // Convert the inputs to integers, and build a new pair out of them.

+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),

+ TLI.getTypeToTransformTo(N->getValueType(0)),

+ BitConvertToInteger(N->getOperand(0)),

+ BitConvertToInteger(N->getOperand(1)));

+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {

+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),

+ TLI.getTypeToTransformTo(N->getValueType(0)));

+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {

+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));

+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),

+ NewOp.getValueType().getVectorElementType(),

+ NewOp, N->getOperand(1));

+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned Size = NVT.getSizeInBits();

+ // Mask = ~(1 << (Size-1))

+ SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),

+ NVT);

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);

+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::ADD_F32,

+ RTLIB::ADD_F64,

+ RTLIB::ADD_F80,

+ RTLIB::ADD_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::CEIL_F32,

+ RTLIB::CEIL_F64,

+ RTLIB::CEIL_F80,

+ RTLIB::CEIL_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {

+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));

+ SDValue RHS = BitConvertToInteger(N->getOperand(1));

+ DebugLoc dl = N->getDebugLoc();

+ MVT LVT = LHS.getValueType();

+ MVT RVT = RHS.getValueType();

+ unsigned LSize = LVT.getSizeInBits();

+ unsigned RSize = RVT.getSizeInBits();

+ // First get the sign bit of second operand.

+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),

+ DAG.getConstant(RSize - 1,

+ TLI.getShiftAmountTy()));

+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);

+ // Shift right or sign-extend it if the two operands have different types.

+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();

+ if (SizeDiff > 0) {

+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,

+ DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));

+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);

+ } else if (SizeDiff < 0) {

+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);

+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,

+ DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));

+ }

+ // Clear the sign bit of the first operand.

+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),

+ DAG.getConstant(LSize - 1,

+ TLI.getShiftAmountTy()));

+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));

+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);

+ // Or the value with the sign bit.

+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);

+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::COS_F32,

+ RTLIB::COS_F64,

+ RTLIB::COS_F80,

+ RTLIB::COS_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::DIV_F32,

+ RTLIB::DIV_F64,

+ RTLIB::DIV_F80,

+ RTLIB::DIV_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::EXP_F32,

+ RTLIB::EXP_F64,

+ RTLIB::EXP_F80,

+ RTLIB::EXP_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::EXP2_F32,

+ RTLIB::EXP2_F64,

+ RTLIB::EXP2_F80,

+ RTLIB::EXP2_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::FLOOR_F32,

+ RTLIB::FLOOR_F64,

+ RTLIB::FLOOR_F80,

+ RTLIB::FLOOR_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG_F32,

+ RTLIB::LOG_F64,

+ RTLIB::LOG_F80,

+ RTLIB::LOG_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG2_F32,

+ RTLIB::LOG2_F64,

+ RTLIB::LOG2_F80,

+ RTLIB::LOG2_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG10_F32,

+ RTLIB::LOG10_F64,

+ RTLIB::LOG10_F80,

+ RTLIB::LOG10_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::MUL_F32,

+ RTLIB::MUL_F64,

+ RTLIB::MUL_F80,

+ RTLIB::MUL_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::NEARBYINT_F32,

+ RTLIB::NEARBYINT_F64,

+ RTLIB::NEARBYINT_F80,

+ RTLIB::NEARBYINT_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X

+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),

+ GetSoftenedFloat(N->getOperand(0)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::SUB_F32,

+ RTLIB::SUB_F64,

+ RTLIB::SUB_F80,

+ RTLIB::SUB_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = N->getOperand(0);

+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");

+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = N->getOperand(0);

+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");

+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::POW_F32,

+ RTLIB::POW_F64,

+ RTLIB::POW_F80,

+ RTLIB::POW_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {

+ assert(N->getOperand(1).getValueType() == MVT::i32 &&

+ "Unsupported power type!");

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::POWI_F32,

+ RTLIB::POWI_F64,

+ RTLIB::POWI_F80,

+ RTLIB::POWI_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::REM_F32,

+ RTLIB::REM_F64,

+ RTLIB::REM_F80,

+ RTLIB::REM_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::RINT_F32,

+ RTLIB::RINT_F64,

+ RTLIB::RINT_F80,

+ RTLIB::RINT_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::SIN_F32,

+ RTLIB::SIN_F64,

+ RTLIB::SIN_F80,

+ RTLIB::SIN_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::SQRT_F32,

+ RTLIB::SQRT_F64,

+ RTLIB::SQRT_F80,

+ RTLIB::SQRT_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

+ GetSoftenedFloat(N->getOperand(1)) };

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::SUB_F32,

+ RTLIB::SUB_F64,

+ RTLIB::SUB_F80,

+ RTLIB::SUB_PPCF128),

+ NVT, Ops, 2, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::TRUNC_F32,

+ RTLIB::TRUNC_F64,

+ RTLIB::TRUNC_F80,

+ RTLIB::TRUNC_PPCF128),

+ NVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {

+ LoadSDNode *L = cast<LoadSDNode>(N);

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ DebugLoc dl = N->getDebugLoc();

+ SDValue NewL;

+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {

+ NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),

+ NVT, L->getChain(), L->getBasePtr(), L->getOffset(),

+ L->getSrcValue(), L->getSrcValueOffset(), NVT,

+ L->isVolatile(), L->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));

+ return NewL;

+ }

+ // Do a non-extending load followed by FP_EXTEND.

+ NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,

+ L->getMemoryVT(), L->getChain(),

+ L->getBasePtr(), L->getOffset(),

+ L->getSrcValue(), L->getSrcValueOffset(),

+ L->getMemoryVT(),

+ L->isVolatile(), L->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));

+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));

+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {

+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));

+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));

+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),

+ LHS.getValueType(), N->getOperand(0),LHS,RHS);

+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {

+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));

+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),

+ LHS.getValueType(), N->getOperand(0),

+ N->getOperand(1), LHS, RHS, N->getOperand(4));

+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {

+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));

+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {

+ SDValue Chain = N->getOperand(0); // Get the chain.

+ SDValue Ptr = N->getOperand(1); // Get the pointer.

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ DebugLoc dl = N->getDebugLoc();

+ SDValue NewVAARG;

+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));

+ return NewVAARG;

+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {

+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;

+ MVT SVT = N->getOperand(0).getValueType();

+ MVT RVT = N->getValueType(0);

+ MVT NVT = MVT();

+ DebugLoc dl = N->getDebugLoc();

+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to

+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly

+ // match. Look for an appropriate libcall.

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;

+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {

+ NVT = (MVT::SimpleValueType)t;

+ // The source needs to big enough to hold the operand.

+ if (NVT.bitsGE(SVT))

+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);

+ }

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");

+ // Sign/zero extend the argument if the libcall takes a larger type.

+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,

+ NVT, N->getOperand(0));

+ return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl);

+//===----------------------------------------------------------------------===//

+// Operand Float to Integer Conversion..

+//===----------------------------------------------------------------------===//

+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue Res = SDValue();

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "SoftenFloatOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to soften this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;

+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;

+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;

+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;

+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;

+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;

+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;

+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is

+/// shared among BR_CC, SELECT_CC, and SETCC handlers.

+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,

+ ISD::CondCode &CCCode, DebugLoc dl) {

+ SDValue LHSInt = GetSoftenedFloat(NewLHS);

+ SDValue RHSInt = GetSoftenedFloat(NewRHS);

+ MVT VT = NewLHS.getValueType();

+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");

+ // Expand into one or more soft-fp libcall(s).

+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;

+ switch (CCCode) {

+ case ISD::SETEQ:

+ case ISD::SETOEQ:

+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;

+ break;

+ case ISD::SETNE:

+ case ISD::SETUNE:

+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;

+ break;

+ case ISD::SETGE:

+ case ISD::SETOGE:

+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;

+ break;

+ case ISD::SETLT:

+ case ISD::SETOLT:

+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;

+ break;

+ case ISD::SETLE:

+ case ISD::SETOLE:

+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;

+ break;

+ case ISD::SETGT:

+ case ISD::SETOGT:

+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;

+ break;

+ case ISD::SETUO:

+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;

+ break;

+ case ISD::SETO:

+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;

+ break;

+ default:

+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;

+ switch (CCCode) {

+ case ISD::SETONE:

+ // SETONE = SETOLT | SETOGT

+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;

+ // Fallthrough

+ case ISD::SETUGT:

+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;

+ break;

+ case ISD::SETUGE:

+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;

+ break;

+ case ISD::SETULT:

+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;

+ break;

+ case ISD::SETULE:

+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;

+ break;

+ case ISD::SETUEQ:

+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;

+ break;

+ default: assert(false && "Do not know how to soften this setcc!");

+ }

+ MVT RetVT = MVT::i32; // FIXME: is this the correct return type?

+ SDValue Ops[2] = { LHSInt, RHSInt };

+ NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);

+ NewRHS = DAG.getConstant(0, RetVT);

+ CCCode = TLI.getCmpLibcallCC(LC1);

+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {

+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),

+ NewLHS, NewRHS, DAG.getCondCode(CCCode));

+ NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);

+ NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,

+ NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));

+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);

+ NewRHS = SDValue();

+ }

+SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),

+ GetSoftenedFloat(N->getOperand(0)));

+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {

+ MVT SVT = N->getOperand(0).getValueType();

+ MVT RVT = N->getValueType(0);

+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();

+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If SoftenSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ DAG.getCondCode(CCCode), NewLHS, NewRHS,

+ N->getOperand(4));

+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {

+ MVT RVT = N->getValueType(0);

+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {

+ MVT RVT = N->getValueType(0);

+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");

+ SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());

+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();

+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If SoftenSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ N->getOperand(2), N->getOperand(3),

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();

+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If SoftenSetCCOperands returned a scalar, use it.

+ if (NewRHS.getNode() == 0) {

+ assert(NewLHS.getValueType() == N->getValueType(0) &&

+ "Unexpected setcc expansion!");

+ return NewLHS;

+ }

+ // Otherwise, update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {

+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");

+ assert(OpNo == 1 && "Can only soften the stored value!");

+ StoreSDNode *ST = cast<StoreSDNode>(N);

+ SDValue Val = ST->getValue();

+ DebugLoc dl = N->getDebugLoc();

+ if (ST->isTruncatingStore())

+ // Do an FP_ROUND followed by a non-truncating store.

+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),

+ Val, DAG.getIntPtrConstant(0)));

+ else

+ Val = GetSoftenedFloat(Val);

+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),

+ ST->getSrcValue(), ST->getSrcValueOffset(),

+ ST->isVolatile(), ST->getAlignment());

+//===----------------------------------------------------------------------===//

+// Float Result Expansion

+//===----------------------------------------------------------------------===//

+/// ExpandFloatResult - This method is called when the specified result of the

+/// specified node is found to need expansion. At this point, the node may also

+/// have invalid operands or may have other results that need promotion, we just

+/// know that (at least) one result needs expansion.

+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n");

+ SDValue Lo, Hi;

+ Lo = Hi = SDValue();

+ // See if the target wants to custom expand this node.

+ if (CustomLowerNode(N, N->getValueType(ResNo), true))

+ return;

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "ExpandFloatResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to expand the result of this operator!");

+ abort();

+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;

+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;

+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;

+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;

+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;

+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;

+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;

+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;

+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;

+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;

+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;

+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;

+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;

+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;

+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;

+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;

+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;

+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;

+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;

+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;

+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;

+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;

+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;

+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;

+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;

+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;

+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;

+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;

+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;

+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;

+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;

+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;

+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;

+ case ISD::SINT_TO_FP:

+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;

+ }

+ // If Lo/Hi is null, the sub-method took care of registering results etc.

+ if (Lo.getNode())

+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ assert(NVT.getSizeInBits() == integerPartWidth &&

+ "Do not know how to expand this float constant!");

+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();

+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,

+ &C.getRawData()[1])), NVT);

+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,

+ &C.getRawData()[0])), NVT);

+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ assert(N->getValueType(0) == MVT::ppcf128 &&

+ "Logic only correct for ppcf128!");

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Tmp;

+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);

+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);

+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;

+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,

+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),

+ DAG.getCondCode(ISD::SETEQ));

+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::ADD_F32, RTLIB::ADD_F64,

+ RTLIB::ADD_F80, RTLIB::ADD_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,

+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::COS_F32, RTLIB::COS_F64,

+ RTLIB::COS_F80, RTLIB::COS_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::DIV_F32,

+ RTLIB::DIV_F64,

+ RTLIB::DIV_F80,

+ RTLIB::DIV_PPCF128),

+ N->getValueType(0), Ops, 2, false,

+ N->getDebugLoc());

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::EXP_F32, RTLIB::EXP_F64,

+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,

+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,

+ RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG_F32, RTLIB::LOG_F64,

+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,

+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::LOG10_F32,RTLIB::LOG10_F64,

+ RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::MUL_F32,

+ RTLIB::MUL_F64,

+ RTLIB::MUL_F80,

+ RTLIB::MUL_PPCF128),

+ N->getValueType(0), Ops, 2, false,

+ N->getDebugLoc());

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::NEARBYINT_F32,

+ RTLIB::NEARBYINT_F64,

+ RTLIB::NEARBYINT_F80,

+ RTLIB::NEARBYINT_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedFloat(N->getOperand(0), Lo, Hi);

+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);

+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));

+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);

+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::POW_F32, RTLIB::POW_F64,

+ RTLIB::POW_F80, RTLIB::POW_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::POWI_F32, RTLIB::POWI_F64,

+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::RINT_F32, RTLIB::RINT_F64,

+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::SIN_F32, RTLIB::SIN_F64,

+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,

+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),

+ RTLIB::SUB_F32,

+ RTLIB::SUB_F64,

+ RTLIB::SUB_F80,

+ RTLIB::SUB_PPCF128),

+ N->getValueType(0), Ops, 2, false,

+ N->getDebugLoc());

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),

+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,

+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),

+ N, false);

+ GetPairElements(Call, Lo, Hi);

+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ if (ISD::isNormalLoad(N)) {

+ ExpandRes_NormalLoad(N, Lo, Hi);

+ return;

+ }

+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");

+ LoadSDNode *LD = cast<LoadSDNode>(N);

+ SDValue Chain = LD->getChain();

+ SDValue Ptr = LD->getBasePtr();

+ DebugLoc dl = N->getDebugLoc();

+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");

+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,

+ LD->getSrcValue(), LD->getSrcValueOffset(),

+ LD->getMemoryVT(),

+ LD->isVolatile(), LD->getAlignment());

+ // Remember the chain.

+ Chain = Hi.getValue(1);

+ // The low part is zero.

+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);

+ // Modified the chain - switch anything that used the old chain to use the

+ // new one.

+ ReplaceValueWith(SDValue(LD, 1), Chain);

+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ SDValue Src = N->getOperand(0);

+ MVT SrcVT = Src.getValueType();

+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;

+ DebugLoc dl = N->getDebugLoc();

+ // First do an SINT_TO_FP, whether the original was signed or unsigned.

+ // When promoting partial word types to i32 we must honor the signedness,

+ // though.

+ if (SrcVT.bitsLE(MVT::i32)) {

+ // The integer can be represented exactly in an f64.

+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,

+ MVT::i32, Src);

+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);

+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);

+ } else {

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (SrcVT.bitsLE(MVT::i64)) {

+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,

+ MVT::i64, Src);

+ LC = RTLIB::SINTTOFP_I64_PPCF128;

+ } else if (SrcVT.bitsLE(MVT::i128)) {

+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);

+ LC = RTLIB::SINTTOFP_I128_PPCF128;

+ }

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");

+ Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);

+ GetPairElements(Hi, Lo, Hi);

+ }

+ if (isSigned)

+ return;

+ // Unsigned - fix up the SINT_TO_FP value just calculated.

+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);

+ SrcVT = Src.getValueType();

+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.

+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };

+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };

+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };

+ const uint64_t *Parts = 0;

+ switch (SrcVT.getSimpleVT()) {

+ default:

+ assert(false && "Unsupported UINT_TO_FP!");

+ case MVT::i32:

+ Parts = TwoE32;

+ break;

+ case MVT::i64:

+ Parts = TwoE64;

+ break;

+ case MVT::i128:

+ Parts = TwoE128;

+ break;

+ }

+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,

+ DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),

+ MVT::ppcf128));

+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),

+ Lo, Hi, DAG.getCondCode(ISD::SETLT));

+ GetPairElements(Lo, Lo, Hi);

+//===----------------------------------------------------------------------===//

+// Float Operand Expansion

+//===----------------------------------------------------------------------===//

+/// ExpandFloatOperand - This method is called when the specified operand of the

+/// specified node is found to need expansion. At this point, all of the result

+/// types of the node are known to be legal, but other operands of the node may

+/// need promotion or expansion as well as the specified one.

+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n");

+ SDValue Res = SDValue();

+ if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())

+ == TargetLowering::Custom)

+ Res = TLI.LowerOperation(SDValue(N, 0), DAG);

+ if (Res.getNode() == 0) {

+ switch (N->getOpcode()) {

+ default:

+ #ifndef NDEBUG

+ cerr << "ExpandFloatOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+ #endif

+ assert(0 && "Do not know how to expand this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;

+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;

+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;

+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;

+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;

+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;

+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;

+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;

+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;

+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),

+ OpNo); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code

+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.

+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,

+ SDValue &NewRHS,

+ ISD::CondCode &CCCode,

+ DebugLoc dl) {

+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;

+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);

+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);

+ MVT VT = NewLHS.getValueType();

+ assert(VT == MVT::ppcf128 && "Unsupported setcc type!");

+ // FIXME: This generated code sucks. We want to generate

+ // FCMPU crN, hi1, hi2

+ // BNE crN, L:

+ // FCMPU crN, lo1, lo2

+ // The following can be improved, but not that much.

+ SDValue Tmp1, Tmp2, Tmp3;

+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, ISD::SETOEQ);

+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),

+ LHSLo, RHSLo, CCCode);

+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);

+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, ISD::SETUNE);

+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, CCCode);

+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);

+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);

+ NewRHS = SDValue(); // LHS is the result, not a compare.

+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();

+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ DAG.getCondCode(CCCode), NewLHS, NewRHS,

+ N->getOperand(4));

+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {

+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&

+ "Logic only correct for ppcf128!");

+ SDValue Lo, Hi;

+ GetExpandedFloat(N->getOperand(0), Lo, Hi);

+ // Round it the rest of the way (e.g. to f32) if needed.

+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),

+ N->getValueType(0), Hi, N->getOperand(1));

+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {

+ MVT RVT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.

+ if (RVT == MVT::i32) {

+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&

+ "Logic only correct for ppcf128!");

+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,

+ N->getOperand(0), DAG.getValueType(MVT::f64));

+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,

+ DAG.getIntPtrConstant(1));

+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);

+ }

+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");

+ return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);

+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {

+ MVT RVT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.

+ if (RVT == MVT::i32) {

+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&

+ "Logic only correct for ppcf128!");

+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};

+ APFloat APF = APFloat(APInt(128, 2, TwoE31));

+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);

+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X

+ // FIXME: generated code sucks.

+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,

+ DAG.getNode(ISD::ADD, dl, MVT::i32,

+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,

+ DAG.getNode(ISD::FSUB, dl,

+ MVT::ppcf128,

+ N->getOperand(0),

+ Tmp)),

+ DAG.getConstant(0x80000000, MVT::i32)),

+ DAG.getNode(ISD::FP_TO_SINT, dl,

+ MVT::i32, N->getOperand(0)),

+ DAG.getCondCode(ISD::SETGE));

+ }

+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");

+ return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);

+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();

+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ N->getOperand(2), N->getOperand(3),

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();

+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, use it.

+ if (NewRHS.getNode() == 0) {

+ assert(NewLHS.getValueType() == N->getValueType(0) &&

+ "Unexpected setcc expansion!");

+ return NewLHS;

+ }

+ // Otherwise, update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {

+ if (ISD::isNormalStore(N))

+ return ExpandOp_NormalStore(N, OpNo);

+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");

+ assert(OpNo == 1 && "Can only expand the stored value so far");

+ StoreSDNode *ST = cast<StoreSDNode>(N);

+ SDValue Chain = ST->getChain();

+ SDValue Ptr = ST->getBasePtr();

+ MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType());

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");

+ SDValue Lo, Hi;

+ GetExpandedOp(ST->getValue(), Lo, Hi);

+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,

+ ST->getSrcValue(), ST->getSrcValueOffset(),

+ ST->getMemoryVT(),

+ ST->isVolatile(), ST->getAlignment());

diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..eb9342cc8b8e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

@@ -0,0 +1,2382 @@

+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements integer type expansion and promotion for LegalizeTypes.

+// Promotion is the act of changing a computation in an illegal type into a

+// computation in a larger type. For example, implementing i8 arithmetic in an

+// i32 register (often needed on powerpc).

+// Expansion is the act of changing a computation in an illegal type into a

+// computation in two identical registers of a smaller type. For example,

+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit

+// targets).

+//

+//===----------------------------------------------------------------------===//

+#include "LegalizeTypes.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+using namespace llvm;

+//===----------------------------------------------------------------------===//

+// Integer Result Promotion

+//===----------------------------------------------------------------------===//

+/// PromoteIntegerResult - This method is called when a result of a node is

+/// found to be in need of promotion to a larger type. At this point, the node

+/// may also have invalid operands or may have other results that need

+/// expansion, we just know that (at least) one result needs promotion.

+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n");

+ SDValue Res = SDValue();

+ // See if the target wants to custom expand this node.

+ if (CustomLowerNode(N, N->getValueType(ResNo), true))

+ return;

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "PromoteIntegerResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to promote this operator!");

+ abort();

+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;

+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;

+ case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;

+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;

+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;

+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;

+ case ISD::CONVERT_RNDSAT:

+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;

+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;

+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;

+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;

+ case ISD::EXTRACT_VECTOR_ELT:

+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;

+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;

+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;

+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;

+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;

+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;

+ case ISD::SIGN_EXTEND_INREG:

+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;

+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;

+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;

+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;

+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;

+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;

+ case ISD::SIGN_EXTEND:

+ case ISD::ZERO_EXTEND:

+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR:

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;

+ case ISD::SDIV:

+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;

+ case ISD::UDIV:

+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;

+ case ISD::SADDO:

+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;

+ case ISD::UADDO:

+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;

+ case ISD::SMULO:

+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;

+ case ISD::ATOMIC_LOAD_ADD:

+ case ISD::ATOMIC_LOAD_SUB:

+ case ISD::ATOMIC_LOAD_AND:

+ case ISD::ATOMIC_LOAD_OR:

+ case ISD::ATOMIC_LOAD_XOR:

+ case ISD::ATOMIC_LOAD_NAND:

+ case ISD::ATOMIC_LOAD_MIN:

+ case ISD::ATOMIC_LOAD_MAX:

+ case ISD::ATOMIC_LOAD_UMIN:

+ case ISD::ATOMIC_LOAD_UMAX:

+ case ISD::ATOMIC_SWAP:

+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;

+ case ISD::ATOMIC_CMP_SWAP:

+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;

+ }

+ // If the result is null then the sub-method took care of registering it.

+ if (Res.getNode())

+ SetPromotedInteger(SDValue(N, ResNo), Res);

+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {

+ // Sign-extend the new bits, and continue the assertion.

+ SDValue Op = SExtPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),

+ Op.getValueType(), Op, N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {

+ // Zero the new bits, and continue the assertion.

+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),

+ Op.getValueType(), Op, N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {

+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));

+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),

+ N->getMemoryVT(),

+ N->getChain(), N->getBasePtr(),

+ Op2, N->getSrcValue(), N->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

+ return Res;

+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {

+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));

+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));

+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),

+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),

+ Op2, Op3, N->getSrcValue(), N->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

+ return Res;

+SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {

+ SDValue InOp = N->getOperand(0);

+ MVT InVT = InOp.getValueType();

+ MVT NInVT = TLI.getTypeToTransformTo(InVT);

+ MVT OutVT = N->getValueType(0);

+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);

+ DebugLoc dl = N->getDebugLoc();

+ switch (getTypeAction(InVT)) {

+ default:

+ assert(false && "Unknown type action!");

+ break;

+ case Legal:

+ break;

+ case PromoteInteger:

+ if (NOutVT.bitsEq(NInVT))

+ // The input promotes to the same size. Convert the promoted value.

+ return DAG.getNode(ISD::BIT_CONVERT, dl,

+ NOutVT, GetPromotedInteger(InOp));

+ break;

+ case SoftenFloat:

+ // Promote the integer operand by hand.

+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));

+ case ExpandInteger:

+ case ExpandFloat:

+ break;

+ case ScalarizeVector:

+ // Convert the element to an integer and promote it by hand.

+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,

+ BitConvertToInteger(GetScalarizedVector(InOp)));

+ case SplitVector: {

+ // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split

+ // pieces of the input into integers and reassemble in the final type.

+ SDValue Lo, Hi;

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ Lo = BitConvertToInteger(Lo);

+ Hi = BitConvertToInteger(Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,

+ MVT::getIntegerVT(NOutVT.getSizeInBits()),

+ JoinIntegers(Lo, Hi));

+ return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);

+ }

+ case WidenVector:

+ if (OutVT.bitsEq(NInVT))

+ // The input is widened to the same size. Convert to the widened value.

+ return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));

+ }

+ // Otherwise, lower the bit-convert to a store/load from the stack.

+ // Create the stack frame object. Make sure it is aligned for both

+ // the source and destination types.

+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);

+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(FI);

+ // Emit a store to the stack slot.

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);

+ // Result is an extending load from the stack slot.

+ return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT);

+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ MVT OVT = N->getValueType(0);

+ MVT NVT = Op.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();

+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),

+ DAG.getConstant(DiffBits, TLI.getPointerTy()));

+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {

+ // The pair element type may be legal, or may not promote to the same type as

+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),

+ TLI.getTypeToTransformTo(N->getValueType(0)),

+ JoinIntegers(N->getOperand(0), N->getOperand(1)));

+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {

+ MVT VT = N->getValueType(0);

+ // FIXME there is no actual debug info here

+ DebugLoc dl = N->getDebugLoc();

+ // Zero extend things like i1, sign extend everything else. It shouldn't

+ // matter in theory which one we pick, but this tends to give better code?

+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

+ SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT),

+ SDValue(N, 0));

+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");

+ return Result;

+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {

+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();

+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||

+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||

+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&

+ "can only promote integers");

+ MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),

+ N->getOperand(1), N->getOperand(2),

+ N->getOperand(3), N->getOperand(4), CvtCode);

+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {

+ // Zero extend to the promoted type and do the count there.

+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));

+ DebugLoc dl = N->getDebugLoc();

+ MVT OVT = N->getValueType(0);

+ MVT NVT = Op.getValueType();

+ Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);

+ // Subtract off the extra leading bits in the bigger type.

+ return DAG.getNode(ISD::SUB, dl, NVT, Op,

+ DAG.getConstant(NVT.getSizeInBits() -

+ OVT.getSizeInBits(), NVT));

+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {

+ // Zero extend to the promoted type and do the count there.

+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);

+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ MVT OVT = N->getValueType(0);

+ MVT NVT = Op.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ // The count is the same in the promoted type except if the original

+ // value was zero. This can be handled by setting the bit just off

+ // the top of the original type.

+ APInt TopBit(NVT.getSizeInBits(), 0);

+ TopBit.set(OVT.getSizeInBits());

+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));

+ return DAG.getNode(ISD::CTTZ, dl, NVT, Op);

+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {

+ MVT OldVT = N->getValueType(0);

+ SDValue OldVec = N->getOperand(0);

+ if (getTypeAction(OldVec.getValueType()) == WidenVector)

+ OldVec = GetWidenedVector(N->getOperand(0));

+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();

+ DebugLoc dl = N->getDebugLoc();

+ if (OldElts == 1) {

+ assert(!isTypeLegal(OldVec.getValueType()) &&

+ "Legal one-element vector of a type needing promotion!");

+ // It is tempting to follow GetScalarizedVector by a call to

+ // GetPromotedInteger, but this would be wrong because the

+ // scalarized value may not yet have been processed.

+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT),

+ GetScalarizedVector(OldVec));

+ }

+ // Convert to a vector half as long with an element type of twice the width,

+ // for example <4 x i16> -> <2 x i32>.

+ assert(!(OldElts & 1) && "Odd length vectors not supported!");

+ MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits());

+ assert(OldVT.isSimple() && NewVT.isSimple());

+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::getVectorVT(NewVT, OldElts / 2),

+ OldVec);

+ // Extract the element at OldIdx / 2 from the new vector.

+ SDValue OldIdx = N->getOperand(1);

+ SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx,

+ DAG.getConstant(1, TLI.getPointerTy()));

+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx);

+ // Select the appropriate half of the element: Lo if OldIdx was even,

+ // Hi if it was odd.

+ SDValue Lo = Elt;

+ SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt,

+ DAG.getConstant(OldVT.getSizeInBits(),

+ TLI.getPointerTy()));

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ // Extend to the promoted type.

+ SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx);

+ SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo);

+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res);

+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned NewOpc = N->getOpcode();

+ DebugLoc dl = N->getDebugLoc();

+ // If we're promoting a UINT to a larger size, check to see if the new node

+ // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since

+ // we can use that instead. This allows us to generate better code for

+ // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not

+ // legal, such as PowerPC.

+ if (N->getOpcode() == ISD::FP_TO_UINT &&

+ !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) &&

+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))

+ NewOpc = ISD::FP_TO_SINT;

+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));

+ // Assert that the converted value fits in the original type. If it doesn't

+ // (eg: because the value being converted is too big), then the result of the

+ // original operation was undefined anyway, so the assert is still correct.

+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?

+ ISD::AssertZext : ISD::AssertSext, dl,

+ NVT, Res, DAG.getValueType(N->getValueType(0)));

+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {

+ SDValue Res = GetPromotedInteger(N->getOperand(0));

+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");

+ // If the result and operand types are the same after promotion, simplify

+ // to an in-register extension.

+ if (NVT == Res.getValueType()) {

+ // The high bits are not guaranteed to be anything. Insert an extend.

+ if (N->getOpcode() == ISD::SIGN_EXTEND)

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,

+ DAG.getValueType(N->getOperand(0).getValueType()));

+ if (N->getOpcode() == ISD::ZERO_EXTEND)

+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());

+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");

+ return Res;

+ }

+ // Otherwise, just extend the original operand all the way to the larger type.

+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));

+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {

+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ ISD::LoadExtType ExtType =

+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),

+ N->getSrcValue(), N->getSrcValueOffset(),

+ N->getMemoryVT(), N->isVolatile(),

+ N->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

+ return Res;

+/// Promote the overflow flag of an overflowing arithmetic node.

+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {

+ // Simply change the return type of the boolean result.

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1));

+ MVT ValueVTs[] = { N->getValueType(0), NVT };

+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };

+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ DAG.getVTList(ValueVTs, 2), Ops, 2);

+ // Modified the sum result - switch anything that used the old sum to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return SDValue(Res.getNode(), 1);

+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {

+ if (ResNo == 1)

+ return PromoteIntRes_Overflow(N);

+ // The operation overflowed iff the result in the larger type is not the

+ // sign extension of its truncation to the original type.

+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));

+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));

+ MVT OVT = N->getOperand(0).getValueType();

+ MVT NVT = LHS.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ // Do the arithmetic in the larger type.

+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;

+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);

+ // Calculate the overflow flag: sign extend the arithmetic result from

+ // the original type.

+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,

+ DAG.getValueType(OVT));

+ // Overflowed if and only if this is not equal to Res.

+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);

+ // Use the calculated overflow everywhere.

+ ReplaceValueWith(SDValue(N, 1), Ofl);

+ return Res;

+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {

+ // Sign extend the input.

+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));

+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ LHS.getValueType(), LHS, RHS);

+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {

+ SDValue LHS = GetPromotedInteger(N->getOperand(1));

+ SDValue RHS = GetPromotedInteger(N->getOperand(2));

+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),

+ LHS.getValueType(), N->getOperand(0),LHS,RHS);

+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {

+ SDValue LHS = GetPromotedInteger(N->getOperand(2));

+ SDValue RHS = GetPromotedInteger(N->getOperand(3));

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),

+ LHS.getValueType(), N->getOperand(0),

+ N->getOperand(1), LHS, RHS, N->getOperand(4));

+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {

+ MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());

+ assert(isTypeLegal(SVT) && "Illegal SetCC type!");

+ DebugLoc dl = N->getDebugLoc();

+ // Get the SETCC result using the canonical SETCC type.

+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),

+ N->getOperand(1), N->getOperand(2));

+ // Convert to the expected type.

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");

+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);

+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {

+ return DAG.getNode(ISD::SHL, N->getDebugLoc(),

+ TLI.getTypeToTransformTo(N->getValueType(0)),

+ GetPromotedInteger(N->getOperand(0)), N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),

+ Op.getValueType(), Op, N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {

+ // The input may have strange things in the top bits of the registers, but

+ // these operations don't care. They may have weird bits going out, but

+ // that too is okay if they are integer operations.

+ SDValue LHS = GetPromotedInteger(N->getOperand(0));

+ SDValue RHS = GetPromotedInteger(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ LHS.getValueType(), LHS, RHS);

+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {

+ // The input value must be properly sign extended.

+ SDValue Res = SExtPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::SRA, N->getDebugLoc(),

+ Res.getValueType(), Res, N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {

+ // The input value must be properly zero extended.

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));

+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Res;

+ switch (getTypeAction(N->getOperand(0).getValueType())) {

+ default: assert(0 && "Unknown type action!");

+ case Legal:

+ case ExpandInteger:

+ Res = N->getOperand(0);

+ break;

+ case PromoteInteger:

+ Res = GetPromotedInteger(N->getOperand(0));

+ break;

+ }

+ // Truncate to NVT instead of VT

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);

+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {

+ if (ResNo == 1)

+ return PromoteIntRes_Overflow(N);

+ // The operation overflowed iff the result in the larger type is not the

+ // zero extension of its truncation to the original type.

+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));

+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));

+ MVT OVT = N->getOperand(0).getValueType();

+ MVT NVT = LHS.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ // Do the arithmetic in the larger type.

+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;

+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);

+ // Calculate the overflow flag: zero extend the arithmetic result from

+ // the original type.

+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);

+ // Overflowed if and only if this is not equal to Res.

+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);

+ // Use the calculated overflow everywhere.

+ ReplaceValueWith(SDValue(N, 1), Ofl);

+ return Res;

+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {

+ // Zero extend the input.

+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));

+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ LHS.getValueType(), LHS, RHS);

+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {

+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));

+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {

+ SDValue Chain = N->getOperand(0); // Get the chain.

+ SDValue Ptr = N->getOperand(1); // Get the pointer.

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ MVT RegVT = TLI.getRegisterType(VT);

+ unsigned NumRegs = TLI.getNumRegisters(VT);

+ // The argument is passed as NumRegs registers of type RegVT.

+ SmallVector<SDValue, 8> Parts(NumRegs);

+ for (unsigned i = 0; i < NumRegs; ++i) {

+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));

+ Chain = Parts[i].getValue(1);

+ }

+ // Handle endianness of the load.

+ if (TLI.isBigEndian())

+ std::reverse(Parts.begin(), Parts.end());

+ // Assemble the parts in the promoted type.

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);

+ for (unsigned i = 1; i < NumRegs; ++i) {

+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);

+ // Shift it to the right position and "or" it in.

+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,

+ DAG.getConstant(i * RegVT.getSizeInBits(),

+ TLI.getPointerTy()));

+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);

+ }

+ // Modified the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Chain);

+ return Res;

+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {

+ assert(ResNo == 1 && "Only boolean result promotion currently supported!");

+ return PromoteIntRes_Overflow(N);

+//===----------------------------------------------------------------------===//

+// Integer Operand Promotion

+//===----------------------------------------------------------------------===//

+/// PromoteIntegerOperand - This method is called when the specified operand of

+/// the specified node is found to need promotion. At this point, all of the

+/// result types of the node are known to be legal, but other operands of the

+/// node may need promotion or expansion as well as the specified one.

+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n");

+ SDValue Res = SDValue();

+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))

+ return false;

+ switch (N->getOpcode()) {

+ default:

+ #ifndef NDEBUG

+ cerr << "PromoteIntegerOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+ #endif

+ assert(0 && "Do not know how to promote this operator's operand!");

+ abort();

+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;

+ case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;

+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;

+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;

+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;

+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;

+ case ISD::CONVERT_RNDSAT:

+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;

+ case ISD::INSERT_VECTOR_ELT:

+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;

+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;

+ case ISD::SCALAR_TO_VECTOR:

+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;

+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;

+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;

+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;

+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;

+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;

+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),

+ OpNo); break;

+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;

+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;

+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::ROTL:

+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is

+/// shared among BR_CC, SELECT_CC, and SETCC handlers.

+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,

+ ISD::CondCode CCCode) {

+ // We have to insert explicit sign or zero extends. Note that we could

+ // insert sign extends for ALL conditions, but zero extend is cheaper on

+ // many machines (an AND instead of two shifts), so prefer it.

+ switch (CCCode) {

+ default: assert(0 && "Unknown integer comparison!");

+ case ISD::SETEQ:

+ case ISD::SETNE:

+ case ISD::SETUGE:

+ case ISD::SETUGT:

+ case ISD::SETULE:

+ case ISD::SETULT:

+ // ALL of these operations will work if we either sign or zero extend

+ // the operands (including the unsigned comparisons!). Zero extend is

+ // usually a simpler/cheaper operation, so prefer it.

+ NewLHS = ZExtPromotedInteger(NewLHS);

+ NewRHS = ZExtPromotedInteger(NewRHS);

+ break;

+ case ISD::SETGE:

+ case ISD::SETGT:

+ case ISD::SETLT:

+ case ISD::SETLE:

+ NewLHS = SExtPromotedInteger(NewLHS);

+ NewRHS = SExtPromotedInteger(NewRHS);

+ break;

+ }

+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);

+SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {

+ // This should only occur in unusual situations like bitcasting to an

+ // x86_fp80, so just turn it into a store+load

+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));

+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {

+ assert(OpNo == 2 && "Don't know how to promote this operand!");

+ SDValue LHS = N->getOperand(2);

+ SDValue RHS = N->getOperand(3);

+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());

+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always

+ // legal types.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ N->getOperand(1), LHS, RHS, N->getOperand(4));

+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {

+ assert(OpNo == 1 && "only know how to promote condition");

+ // Promote all the way up to the canonical SetCC type.

+ MVT SVT = TLI.getSetCCResultType(MVT::Other);

+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);

+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,

+ N->getOperand(2));

+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {

+ // Since the result type is legal, the operands must promote to it.

+ MVT OVT = N->getOperand(0).getValueType();

+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));

+ SDValue Hi = GetPromotedInteger(N->getOperand(1));

+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");

+ DebugLoc dl = N->getDebugLoc();

+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,

+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));

+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);

+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {

+ // The vector type is legal but the element type is not. This implies

+ // that the vector is a power-of-two in length and that the element

+ // type does not have a strange size (eg: it is not i1).

+ MVT VecVT = N->getValueType(0);

+ unsigned NumElts = VecVT.getVectorNumElements();

+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");

+ // Promote the inserted value. The type does not need to match the

+ // vector element type. Check that any extra bits introduced will be

+ // truncated away.

+ assert(N->getOperand(0).getValueType().getSizeInBits() >=

+ N->getValueType(0).getVectorElementType().getSizeInBits() &&

+ "Type of inserted value narrower than vector element type!");

+ SmallVector<SDValue, 16> NewOps;

+ for (unsigned i = 0; i < NumElts; ++i)

+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));

+ return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);

+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {

+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();

+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||

+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||

+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&

+ "can only promote integer arguments");

+ SDValue InOp = GetPromotedInteger(N->getOperand(0));

+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,

+ N->getOperand(1), N->getOperand(2),

+ N->getOperand(3), N->getOperand(4), CvtCode);

+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,

+ unsigned OpNo) {

+ if (OpNo == 1) {

+ // Promote the inserted value. This is valid because the type does not

+ // have to match the vector element type.

+ // Check that any extra bits introduced will be truncated away.

+ assert(N->getOperand(1).getValueType().getSizeInBits() >=

+ N->getValueType(0).getVectorElementType().getSizeInBits() &&

+ "Type of inserted value narrower than vector element type!");

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ GetPromotedInteger(N->getOperand(1)),

+ N->getOperand(2));

+ }

+ assert(OpNo == 2 && "Different operand and result vector types?");

+ // Promote the index.

+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ N->getOperand(1), Idx);

+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {

+ SDValue NewOps[6];

+ DebugLoc dl = N->getDebugLoc();

+ NewOps[0] = N->getOperand(0);

+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {

+ SDValue Flag = GetPromotedInteger(N->getOperand(i));

+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);

+ }

+ return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,

+ array_lengthof(NewOps));

+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {

+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote

+ // the operand in place.

+ return DAG.UpdateNodeOperands(SDValue(N, 0),

+ GetPromotedInteger(N->getOperand(0)));

+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {

+ assert(OpNo == 0 && "Only know how to promote condition");

+ // Promote all the way up to the canonical SetCC type.

+ MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());

+ SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);

+ return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,

+ N->getOperand(1), N->getOperand(2));

+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {

+ assert(OpNo == 0 && "Don't know how to promote this operand!");

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());

+ // The CC (#4) and the possible return values (#2 and #3) have legal types.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),

+ N->getOperand(3), N->getOperand(4));

+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {

+ assert(OpNo == 0 && "Don't know how to promote this operand!");

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());

+ // The CC (#2) is always legal.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));

+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ ZExtPromotedInteger(N->getOperand(1)));

+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ DebugLoc dl = N->getDebugLoc();

+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),

+ Op, DAG.getValueType(N->getOperand(0).getValueType()));

+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {

+ return DAG.UpdateNodeOperands(SDValue(N, 0),

+ SExtPromotedInteger(N->getOperand(0)));

+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){

+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");

+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();

+ int SVOffset = N->getSrcValueOffset();

+ unsigned Alignment = N->getAlignment();

+ bool isVolatile = N->isVolatile();

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.

+ // Truncate the value and store the result.

+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),

+ SVOffset, N->getMemoryVT(),

+ isVolatile, Alignment);

+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);

+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {

+ return DAG.UpdateNodeOperands(SDValue(N, 0),

+ ZExtPromotedInteger(N->getOperand(0)));

+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Op = GetPromotedInteger(N->getOperand(0));

+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);

+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());

+//===----------------------------------------------------------------------===//

+// Integer Result Expansion

+//===----------------------------------------------------------------------===//

+/// ExpandIntegerResult - This method is called when the specified result of the

+/// specified node is found to need expansion. At this point, the node may also

+/// have invalid operands or may have other results that need promotion, we just

+/// know that (at least) one result needs expansion.

+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n");

+ SDValue Lo, Hi;

+ Lo = Hi = SDValue();

+ // See if the target wants to custom expand this node.

+ if (CustomLowerNode(N, N->getValueType(ResNo), true))

+ return;

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "ExpandIntegerResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to expand the result of this operator!");

+ abort();

+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;

+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;

+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;

+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;

+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;

+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;

+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;

+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;

+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;

+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;

+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;

+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;

+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;

+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;

+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;

+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;

+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;

+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;

+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;

+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;

+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;

+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;

+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;

+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;

+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;

+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;

+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;

+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;

+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;

+ case ISD::ADD:

+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;

+ case ISD::ADDC:

+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;

+ case ISD::ADDE:

+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;

+ }

+ // If Lo/Hi is null, the sub-method took care of registering results etc.

+ if (Lo.getNode())

+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);

+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,

+/// and the shift amount is a constant 'Amt'. Expand the operation.

+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ // Expand the incoming operand to be shifted, so that we have its parts

+ SDValue InL, InH;

+ GetExpandedInteger(N->getOperand(0), InL, InH);

+ MVT NVT = InL.getValueType();

+ unsigned VTBits = N->getValueType(0).getSizeInBits();

+ unsigned NVTBits = NVT.getSizeInBits();

+ MVT ShTy = N->getOperand(1).getValueType();

+ if (N->getOpcode() == ISD::SHL) {

+ if (Amt > VTBits) {

+ Lo = Hi = DAG.getConstant(0, NVT);

+ } else if (Amt > NVTBits) {

+ Lo = DAG.getConstant(0, NVT);

+ Hi = DAG.getNode(ISD::SHL, dl,

+ NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));

+ } else if (Amt == NVTBits) {

+ Lo = DAG.getConstant(0, NVT);

+ Hi = InL;

+ } else if (Amt == 1 &&

+ TLI.isOperationLegalOrCustom(ISD::ADDC,

+ TLI.getTypeToExpandTo(NVT))) {

+ // Emit this X << 1 as X+X.

+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);

+ SDValue LoOps[2] = { InL, InL };

+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);

+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };

+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);

+ } else {

+ Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));

+ Hi = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SHL, dl, NVT, InH,

+ DAG.getConstant(Amt, ShTy)),

+ DAG.getNode(ISD::SRL, dl, NVT, InL,

+ DAG.getConstant(NVTBits-Amt, ShTy)));

+ }

+ return;

+ }

+ if (N->getOpcode() == ISD::SRL) {

+ if (Amt > VTBits) {

+ Lo = DAG.getConstant(0, NVT);

+ Hi = DAG.getConstant(0, NVT);

+ } else if (Amt > NVTBits) {

+ Lo = DAG.getNode(ISD::SRL, dl,

+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));

+ Hi = DAG.getConstant(0, NVT);

+ } else if (Amt == NVTBits) {

+ Lo = InH;

+ Hi = DAG.getConstant(0, NVT);

+ } else {

+ Lo = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SRL, dl, NVT, InL,

+ DAG.getConstant(Amt, ShTy)),

+ DAG.getNode(ISD::SHL, dl, NVT, InH,

+ DAG.getConstant(NVTBits-Amt, ShTy)));

+ Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));

+ }

+ return;

+ }

+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");

+ if (Amt > VTBits) {

+ Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,

+ DAG.getConstant(NVTBits-1, ShTy));

+ } else if (Amt > NVTBits) {

+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,

+ DAG.getConstant(Amt-NVTBits, ShTy));

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,

+ DAG.getConstant(NVTBits-1, ShTy));

+ } else if (Amt == NVTBits) {

+ Lo = InH;

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,

+ DAG.getConstant(NVTBits-1, ShTy));

+ } else {

+ Lo = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SRL, dl, NVT, InL,

+ DAG.getConstant(Amt, ShTy)),

+ DAG.getNode(ISD::SHL, dl, NVT, InH,

+ DAG.getConstant(NVTBits-Amt, ShTy)));

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));

+ }

+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify

+/// this shift based on knowledge of the high bit of the shift amount. If we

+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual

+/// shift amount.

+bool DAGTypeLegalizer::

+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {

+ SDValue Amt = N->getOperand(1);

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ MVT ShTy = Amt.getValueType();

+ unsigned ShBits = ShTy.getSizeInBits();

+ unsigned NVTBits = NVT.getSizeInBits();

+ assert(isPowerOf2_32(NVTBits) &&

+ "Expanded integer type size not a power of two!");

+ DebugLoc dl = N->getDebugLoc();

+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));

+ APInt KnownZero, KnownOne;

+ DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);

+ // If we don't know anything about the high bits, exit.

+ if (((KnownZero|KnownOne) & HighBitMask) == 0)

+ return false;

+ // Get the incoming operand to be shifted.

+ SDValue InL, InH;

+ GetExpandedInteger(N->getOperand(0), InL, InH);

+ // If we know that any of the high bits of the shift amount are one, then we

+ // can do this as a couple of simple shifts.

+ if (KnownOne.intersects(HighBitMask)) {

+ // Mask out the high bit, which we know is set.

+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,

+ DAG.getConstant(~HighBitMask, ShTy));

+ switch (N->getOpcode()) {

+ default: assert(0 && "Unknown shift");

+ case ISD::SHL:

+ Lo = DAG.getConstant(0, NVT); // Low part is zero.

+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.

+ return true;

+ case ISD::SRL:

+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.

+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.

+ return true;

+ case ISD::SRA:

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.

+ DAG.getConstant(NVTBits-1, ShTy));

+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.

+ return true;

+ }

+#if 0

+ // FIXME: This code is broken for shifts with a zero amount!

+ // If we know that all of the high bits of the shift amount are zero, then we

+ // can do this as a couple of simple shifts.

+ if ((KnownZero & HighBitMask) == HighBitMask) {

+ // Compute 32-amt.

+ SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,

+ DAG.getConstant(NVTBits, ShTy),

+ Amt);

+ unsigned Op1, Op2;

+ switch (N->getOpcode()) {

+ default: assert(0 && "Unknown shift");

+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;

+ case ISD::SRL:

+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;

+ }

+ Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);

+ Hi = DAG.getNode(ISD::OR, NVT,

+ DAG.getNode(Op1, NVT, InH, Amt),

+ DAG.getNode(Op2, NVT, InL, Amt2));

+ return true;

+ }

+#endif

+ return false;

+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift

+/// of any size.

+bool DAGTypeLegalizer::

+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {

+ SDValue Amt = N->getOperand(1);

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ MVT ShTy = Amt.getValueType();

+ unsigned NVTBits = NVT.getSizeInBits();

+ assert(isPowerOf2_32(NVTBits) &&

+ "Expanded integer type size not a power of two!");

+ DebugLoc dl = N->getDebugLoc();

+ // Get the incoming operand to be shifted.

+ SDValue InL, InH;

+ GetExpandedInteger(N->getOperand(0), InL, InH);

+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);

+ SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);

+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),

+ Amt, NVBitsNode, ISD::SETULT);

+ SDValue Lo1, Hi1, Lo2, Hi2;

+ switch (N->getOpcode()) {

+ default: assert(0 && "Unknown shift");

+ case ISD::SHL:

+ // ShAmt < NVTBits

+ Lo1 = DAG.getConstant(0, NVT); // Low part is zero.

+ Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.

+ // ShAmt >= NVTBits

+ Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);

+ Hi2 = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),

+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2));

+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);

+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);

+ return true;

+ case ISD::SRL:

+ // ShAmt < NVTBits

+ Hi1 = DAG.getConstant(0, NVT); // Hi part is zero.

+ Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.

+ // ShAmt >= NVTBits

+ Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);

+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),

+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));

+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);

+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);

+ return true;

+ case ISD::SRA:

+ // ShAmt < NVTBits

+ Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.

+ DAG.getConstant(NVTBits-1, ShTy));

+ Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.

+ // ShAmt >= NVTBits

+ Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);

+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,

+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),

+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));

+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);

+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);

+ return true;

+ }

+ return false;

+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ // Expand the subcomponents.

+ SDValue LHSL, LHSH, RHSL, RHSH;

+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);

+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);

+ MVT NVT = LHSL.getValueType();

+ SDValue LoOps[2] = { LHSL, RHSL };

+ SDValue HiOps[3] = { LHSH, RHSH };

+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support

+ // them. TODO: Teach operation legalization how to expand unsupported

+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate

+ // a carry of type MVT::Flag, but there doesn't seem to be any way to

+ // generate a value of this type in the expanded code sequence.

+ bool hasCarry =

+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?

+ ISD::ADDC : ISD::SUBC,

+ TLI.getTypeToExpandTo(NVT));

+ if (hasCarry) {

+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);

+ if (N->getOpcode() == ISD::ADD) {

+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);

+ HiOps[2] = Lo.getValue(1);

+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);

+ } else {

+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);

+ HiOps[2] = Lo.getValue(1);

+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);

+ }

+ } else {

+ if (N->getOpcode() == ISD::ADD) {

+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);

+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],

+ ISD::SETULT);

+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,

+ DAG.getConstant(1, NVT),

+ DAG.getConstant(0, NVT));

+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],

+ ISD::SETULT);

+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,

+ DAG.getConstant(1, NVT), Carry1);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);

+ } else {

+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);

+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);

+ SDValue Cmp =

+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),

+ LoOps[0], LoOps[1], ISD::SETULT);

+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,

+ DAG.getConstant(1, NVT),

+ DAG.getConstant(0, NVT));

+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);

+ }

+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ // Expand the subcomponents.

+ SDValue LHSL, LHSH, RHSL, RHSH;

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);

+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);

+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);

+ SDValue LoOps[2] = { LHSL, RHSL };

+ SDValue HiOps[3] = { LHSH, RHSH };

+ if (N->getOpcode() == ISD::ADDC) {

+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);

+ HiOps[2] = Lo.getValue(1);

+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);

+ } else {

+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);

+ HiOps[2] = Lo.getValue(1);

+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);

+ }

+ // Legalized the flag result - switch anything that used the old flag to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));

+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ // Expand the subcomponents.

+ SDValue LHSL, LHSH, RHSL, RHSH;

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);

+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);

+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);

+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };

+ SDValue HiOps[3] = { LHSH, RHSH };

+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);

+ HiOps[2] = Lo.getValue(1);

+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);

+ // Legalized the flag result - switch anything that used the old flag to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));

+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Op = N->getOperand(0);

+ if (Op.getValueType().bitsLE(NVT)) {

+ // The low part is any extension of the input (which degenerates to a copy).

+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);

+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.

+ } else {

+ // For example, extension of an i48 to an i64. The operand type necessarily

+ // promotes to the result type, so will end up being expanded too.

+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&

+ "Only know how to promote this result!");

+ SDValue Res = GetPromotedInteger(Op);

+ assert(Res.getValueType() == N->getValueType(0) &&

+ "Operand over promoted?");

+ // Split the promoted operand. This will simplify when it is expanded.

+ SplitInteger(Res, Lo, Hi);

+ }

+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT NVT = Lo.getValueType();

+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

+ unsigned NVTBits = NVT.getSizeInBits();

+ unsigned EVTBits = EVT.getSizeInBits();

+ if (NVTBits < EVTBits) {

+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,

+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));

+ } else {

+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));

+ // The high part replicates the sign bit of Lo, make it explicit.

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,

+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));

+ }

+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT NVT = Lo.getValueType();

+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

+ unsigned NVTBits = NVT.getSizeInBits();

+ unsigned EVTBits = EVT.getSizeInBits();

+ if (NVTBits < EVTBits) {

+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,

+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));

+ } else {

+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));

+ // The high part must be zero, make it explicit.

+ Hi = DAG.getConstant(0, NVT);

+ }

+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.

+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);

+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);

+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned NBitWidth = NVT.getSizeInBits();

+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();

+ Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);

+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);

+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT NVT = Lo.getValueType();

+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,

+ DAG.getConstant(0, NVT), ISD::SETNE);

+ SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);

+ SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);

+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,

+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,

+ DAG.getConstant(NVT.getSizeInBits(), NVT)));

+ Hi = DAG.getConstant(0, NVT);

+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT NVT = Lo.getValueType();

+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),

+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));

+ Hi = DAG.getConstant(0, NVT);

+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT NVT = Lo.getValueType();

+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,

+ DAG.getConstant(0, NVT), ISD::SETNE);

+ SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);

+ SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);

+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,

+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,

+ DAG.getConstant(NVT.getSizeInBits(), NVT)));

+ Hi = DAG.getConstant(0, NVT);

+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ MVT VT = N->getValueType(0);

+ SDValue Op = N->getOperand(0);

+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");

+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ MVT VT = N->getValueType(0);

+ SDValue Op = N->getOperand(0);

+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");

+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ if (ISD::isNormalLoad(N)) {

+ ExpandRes_NormalLoad(N, Lo, Hi);

+ return;

+ }

+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ SDValue Ch = N->getChain();

+ SDValue Ptr = N->getBasePtr();

+ ISD::LoadExtType ExtType = N->getExtensionType();

+ int SVOffset = N->getSrcValueOffset();

+ unsigned Alignment = N->getAlignment();

+ bool isVolatile = N->isVolatile();

+ DebugLoc dl = N->getDebugLoc();

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ if (N->getMemoryVT().bitsLE(NVT)) {

+ MVT EVT = N->getMemoryVT();

+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,

+ EVT, isVolatile, Alignment);

+ // Remember the chain.

+ Ch = Lo.getValue(1);

+ if (ExtType == ISD::SEXTLOAD) {

+ // The high part is obtained by SRA'ing all but one of the bits of the

+ // lo part.

+ unsigned LoSize = Lo.getValueType().getSizeInBits();

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,

+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));

+ } else if (ExtType == ISD::ZEXTLOAD) {

+ // The high part is just a zero.

+ Hi = DAG.getConstant(0, NVT);

+ } else {

+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");

+ // The high part is undefined.

+ Hi = DAG.getUNDEF(NVT);

+ }

+ } else if (TLI.isLittleEndian()) {

+ // Little-endian - low bits are at low addresses.

+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,

+ isVolatile, Alignment);

+ unsigned ExcessBits =

+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();

+ MVT NEVT = MVT::getIntegerVT(ExcessBits);

+ // Increment the pointer to the other half.

+ unsigned IncrementSize = NVT.getSizeInBits()/8;

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),

+ SVOffset+IncrementSize, NEVT,

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ } else {

+ // Big-endian - high bits are at low addresses. Favor aligned loads at

+ // the cost of some bit-fiddling.

+ MVT EVT = N->getMemoryVT();

+ unsigned EBytes = EVT.getStoreSizeInBits()/8;

+ unsigned IncrementSize = NVT.getSizeInBits()/8;

+ unsigned ExcessBits = (EBytes - IncrementSize)*8;

+ // Load both the high bits and maybe some of the low bits.

+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,

+ MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits),

+ isVolatile, Alignment);

+ // Increment the pointer to the other half.

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ // Load the rest of the low bits.

+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),

+ SVOffset+IncrementSize,

+ MVT::getIntegerVT(ExcessBits),

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ if (ExcessBits < NVT.getSizeInBits()) {

+ // Transfer low bits from the bottom of Hi to the top of Lo.

+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,

+ DAG.getNode(ISD::SHL, dl, NVT, Hi,

+ DAG.getConstant(ExcessBits,

+ TLI.getPointerTy())));

+ // Move high bits to the right position in Hi.

+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,

+ NVT, Hi,

+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,

+ TLI.getPointerTy()));

+ }

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Ch);

+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ SDValue LL, LH, RL, RH;

+ GetExpandedInteger(N->getOperand(0), LL, LH);

+ GetExpandedInteger(N->getOperand(1), RL, RH);

+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);

+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);

+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ DebugLoc dl = N->getDebugLoc();

+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);

+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);

+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);

+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);

+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {

+ SDValue LL, LH, RL, RH;

+ GetExpandedInteger(N->getOperand(0), LL, LH);

+ GetExpandedInteger(N->getOperand(1), RL, RH);

+ unsigned OuterBitSize = VT.getSizeInBits();

+ unsigned InnerBitSize = NVT.getSizeInBits();

+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));

+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));

+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);

+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&

+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {

+ // The inputs are both zero-extended.

+ if (HasUMUL_LOHI) {

+ // We can emit a umul_lohi.

+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);

+ Hi = SDValue(Lo.getNode(), 1);

+ return;

+ }

+ if (HasMULHU) {

+ // We can emit a mulhu+mul.

+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);

+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);

+ return;

+ }

+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {

+ // The input values are both sign-extended.

+ if (HasSMUL_LOHI) {

+ // We can emit a smul_lohi.

+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);

+ Hi = SDValue(Lo.getNode(), 1);

+ return;

+ }

+ if (HasMULHS) {

+ // We can emit a mulhs+mul.

+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);

+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);

+ return;

+ }

+ if (HasUMUL_LOHI) {

+ // Lo,Hi = umul LHS, RHS.

+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,

+ DAG.getVTList(NVT, NVT), LL, RL);

+ Lo = UMulLOHI;

+ Hi = UMulLOHI.getValue(1);

+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);

+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);

+ return;

+ }

+ if (HasMULHU) {

+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);

+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);

+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);

+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);

+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);

+ return;

+ }

+ // If nothing else, we can make a libcall.

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (VT == MVT::i16)

+ LC = RTLIB::MUL_I16;

+ else if (VT == MVT::i32)

+ LC = RTLIB::MUL_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::MUL_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::MUL_I128;

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (VT == MVT::i32)

+ LC = RTLIB::SDIV_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::SDIV_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::SDIV_I128;

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ // If we can emit an efficient shift operation, do so now. Check to see if

+ // the RHS is a constant.

+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))

+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);

+ // If we can determine that the high bit of the shift is zero or one, even if

+ // the low bits are variable, emit this shift in an optimized form.

+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))

+ return;

+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.

+ unsigned PartsOpc;

+ if (N->getOpcode() == ISD::SHL) {

+ PartsOpc = ISD::SHL_PARTS;

+ } else if (N->getOpcode() == ISD::SRL) {

+ PartsOpc = ISD::SRL_PARTS;

+ } else {

+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");

+ PartsOpc = ISD::SRA_PARTS;

+ }

+ // Next check to see if the target supports this SHL_PARTS operation or if it

+ // will custom expand it.

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);

+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||

+ Action == TargetLowering::Custom) {

+ // Expand the subcomponents.

+ SDValue LHSL, LHSH;

+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);

+ SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };

+ MVT VT = LHSL.getValueType();

+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);

+ Hi = Lo.getValue(1);

+ return;

+ }

+ // Otherwise, emit a libcall.

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ bool isSigned;

+ if (N->getOpcode() == ISD::SHL) {

+ isSigned = false; /*sign irrelevant*/

+ if (VT == MVT::i16)

+ LC = RTLIB::SHL_I16;

+ else if (VT == MVT::i32)

+ LC = RTLIB::SHL_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::SHL_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::SHL_I128;

+ } else if (N->getOpcode() == ISD::SRL) {

+ isSigned = false;

+ if (VT == MVT::i16)

+ LC = RTLIB::SRL_I16;

+ else if (VT == MVT::i32)

+ LC = RTLIB::SRL_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::SRL_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::SRL_I128;

+ } else {

+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");

+ isSigned = true;

+ if (VT == MVT::i16)

+ LC = RTLIB::SRA_I16;

+ else if (VT == MVT::i32)

+ LC = RTLIB::SRA_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::SRA_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::SRA_I128;

+ }

+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);

+ return;

+ }

+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))

+ assert(0 && "Unsupported shift!");

+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Op = N->getOperand(0);

+ if (Op.getValueType().bitsLE(NVT)) {

+ // The low part is sign extension of the input (degenerates to a copy).

+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));

+ // The high part is obtained by SRA'ing all but one of the bits of low part.

+ unsigned LoSize = NVT.getSizeInBits();

+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,

+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));

+ } else {

+ // For example, extension of an i48 to an i64. The operand type necessarily

+ // promotes to the result type, so will end up being expanded too.

+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&

+ "Only know how to promote this result!");

+ SDValue Res = GetPromotedInteger(Op);

+ assert(Res.getValueType() == N->getValueType(0) &&

+ "Operand over promoted?");

+ // Split the promoted operand. This will simplify when it is expanded.

+ SplitInteger(Res, Lo, Hi);

+ unsigned ExcessBits =

+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();

+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,

+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));

+ }

+void DAGTypeLegalizer::

+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetExpandedInteger(N->getOperand(0), Lo, Hi);

+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

+ if (EVT.bitsLE(Lo.getValueType())) {

+ // sext_inreg the low part if needed.

+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,

+ N->getOperand(1));

+ // The high part gets the sign extension from the lo-part. This handles

+ // things like sextinreg V:i64 from i8.

+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,

+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,

+ TLI.getPointerTy()));

+ } else {

+ // For example, extension of an i48 to an i64. Leave the low part alone,

+ // sext_inreg the high part.

+ unsigned ExcessBits =

+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();

+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,

+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));

+ }

+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (VT == MVT::i32)

+ LC = RTLIB::SREM_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::SREM_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::SREM_I128;

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));

+ Hi = DAG.getNode(ISD::SRL, dl,

+ N->getOperand(0).getValueType(), N->getOperand(0),

+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));

+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);

+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (VT == MVT::i32)

+ LC = RTLIB::UDIV_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::UDIV_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::UDIV_I128;

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

+ if (VT == MVT::i32)

+ LC = RTLIB::UREM_I32;

+ else if (VT == MVT::i64)

+ LC = RTLIB::UREM_I64;

+ else if (VT == MVT::i128)

+ LC = RTLIB::UREM_I128;

+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Op = N->getOperand(0);

+ if (Op.getValueType().bitsLE(NVT)) {

+ // The low part is zero extension of the input (degenerates to a copy).

+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));

+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.

+ } else {

+ // For example, extension of an i48 to an i64. The operand type necessarily

+ // promotes to the result type, so will end up being expanded too.

+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&

+ "Only know how to promote this result!");

+ SDValue Res = GetPromotedInteger(Op);

+ assert(Res.getValueType() == N->getValueType(0) &&

+ "Operand over promoted?");

+ // Split the promoted operand. This will simplify when it is expanded.

+ SplitInteger(Res, Lo, Hi);

+ unsigned ExcessBits =

+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();

+ Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits));

+ }

+//===----------------------------------------------------------------------===//

+// Integer Operand Expansion

+//===----------------------------------------------------------------------===//

+/// ExpandIntegerOperand - This method is called when the specified operand of

+/// the specified node is found to need expansion. At this point, all of the

+/// result types of the node are known to be legal, but other operands of the

+/// node may need promotion or expansion as well as the specified one.

+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n");

+ SDValue Res = SDValue();

+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))

+ return false;

+ switch (N->getOpcode()) {

+ default:

+ #ifndef NDEBUG

+ cerr << "ExpandIntegerOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+ #endif

+ assert(0 && "Do not know how to expand this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;

+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;

+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;

+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;

+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;

+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;

+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;

+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;

+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;

+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;

+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;

+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::ROTL:

+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code

+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.

+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,

+ SDValue &NewRHS,

+ ISD::CondCode &CCCode,

+ DebugLoc dl) {

+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;

+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);

+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);

+ MVT VT = NewLHS.getValueType();

+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {

+ if (RHSLo == RHSHi) {

+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {

+ if (RHSCST->isAllOnesValue()) {

+ // Equality comparison to -1.

+ NewLHS = DAG.getNode(ISD::AND, dl,

+ LHSLo.getValueType(), LHSLo, LHSHi);

+ NewRHS = RHSLo;

+ return;

+ }

+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);

+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);

+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ return;

+ }

+ // If this is a comparison of the sign bit, just look at the top part.

+ // X > -1, x < 0

+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))

+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0

+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1

+ NewLHS = LHSHi;

+ NewRHS = RHSHi;

+ return;

+ }

+ // FIXME: This generated code sucks.

+ ISD::CondCode LowCC;

+ switch (CCCode) {

+ default: assert(0 && "Unknown integer setcc!");

+ case ISD::SETLT:

+ case ISD::SETULT: LowCC = ISD::SETULT; break;

+ case ISD::SETGT:

+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;

+ case ISD::SETLE:

+ case ISD::SETULE: LowCC = ISD::SETULE; break;

+ case ISD::SETGE:

+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;

+ }

+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison

+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands

+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;

+ // NOTE: on targets without efficient SELECT of bools, we can always use

+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)

+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);

+ SDValue Tmp1, Tmp2;

+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),

+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);

+ if (!Tmp1.getNode())

+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),

+ LHSLo, RHSLo, LowCC);

+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);

+ if (!Tmp2.getNode())

+ Tmp2 = DAG.getNode(ISD::SETCC, dl,

+ TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, DAG.getCondCode(CCCode));

+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());

+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());

+ if ((Tmp1C && Tmp1C->isNullValue()) ||

+ (Tmp2C && Tmp2C->isNullValue() &&

+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||

+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||

+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&

+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||

+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {

+ // low part is known false, returns high part.

+ // For LE / GE, if high part is known false, ignore the low part.

+ // For LT / GT, if high part is known true, ignore the low part.

+ NewLHS = Tmp2;

+ NewRHS = SDValue();

+ return;

+ }

+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, ISD::SETEQ, false,

+ DagCombineInfo, dl);

+ if (!NewLHS.getNode())

+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),

+ LHSHi, RHSHi, ISD::SETEQ);

+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),

+ NewLHS, Tmp1, Tmp2);

+ NewRHS = SDValue();

+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();

+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),

+ DAG.getCondCode(CCCode), NewLHS, NewRHS,

+ N->getOperand(4));

+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();

+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, we need to compare the result

+ // against zero to select between true and false values.

+ if (NewRHS.getNode() == 0) {

+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());

+ CCCode = ISD::SETNE;

+ }

+ // Update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ N->getOperand(2), N->getOperand(3),

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {

+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();

+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());

+ // If ExpandSetCCOperands returned a scalar, use it.

+ if (NewRHS.getNode() == 0) {

+ assert(NewLHS.getValueType() == N->getValueType(0) &&

+ "Unexpected setcc expansion!");

+ return NewLHS;

+ }

+ // Otherwise, update N to have the operands specified.

+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,

+ DAG.getCondCode(CCCode));

+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {

+ // The value being shifted is legal, but the shift amount is too big.

+ // It follows that either the result of the shift is undefined, or the

+ // upper half of the shift amount is zero. Just use the lower half.

+ SDValue Lo, Hi;

+ GetExpandedInteger(N->getOperand(1), Lo, Hi);

+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);

+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {

+ SDValue Op = N->getOperand(0);

+ MVT DstVT = N->getValueType(0);

+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&

+ "Don't know how to expand this SINT_TO_FP!");

+ return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());

+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {

+ if (ISD::isNormalStore(N))

+ return ExpandOp_NormalStore(N, OpNo);

+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");

+ assert(OpNo == 1 && "Can only expand the stored value so far");

+ MVT VT = N->getOperand(1).getValueType();

+ MVT NVT = TLI.getTypeToTransformTo(VT);

+ SDValue Ch = N->getChain();

+ SDValue Ptr = N->getBasePtr();

+ int SVOffset = N->getSrcValueOffset();

+ unsigned Alignment = N->getAlignment();

+ bool isVolatile = N->isVolatile();

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Lo, Hi;

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ if (N->getMemoryVT().bitsLE(NVT)) {

+ GetExpandedInteger(N->getValue(), Lo, Hi);

+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,

+ N->getMemoryVT(), isVolatile, Alignment);

+ } else if (TLI.isLittleEndian()) {

+ // Little-endian - low bits are at low addresses.

+ GetExpandedInteger(N->getValue(), Lo, Hi);

+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,

+ isVolatile, Alignment);

+ unsigned ExcessBits =

+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();

+ MVT NEVT = MVT::getIntegerVT(ExcessBits);

+ // Increment the pointer to the other half.

+ unsigned IncrementSize = NVT.getSizeInBits()/8;

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),

+ SVOffset+IncrementSize, NEVT,

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+ } else {

+ // Big-endian - high bits are at low addresses. Favor aligned stores at

+ // the cost of some bit-fiddling.

+ GetExpandedInteger(N->getValue(), Lo, Hi);

+ MVT EVT = N->getMemoryVT();

+ unsigned EBytes = EVT.getStoreSizeInBits()/8;

+ unsigned IncrementSize = NVT.getSizeInBits()/8;

+ unsigned ExcessBits = (EBytes - IncrementSize)*8;

+ MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits);

+ if (ExcessBits < NVT.getSizeInBits()) {

+ // Transfer high bits from the top of Lo to the bottom of Hi.

+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,

+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,

+ TLI.getPointerTy()));

+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,

+ DAG.getNode(ISD::SRL, dl, NVT, Lo,

+ DAG.getConstant(ExcessBits,

+ TLI.getPointerTy())));

+ }

+ // Store both the high bits and maybe some of the low bits.

+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),

+ SVOffset, HiVT, isVolatile, Alignment);

+ // Increment the pointer to the other half.

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ // Store the lowest ExcessBits bits in the second half.

+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),

+ SVOffset+IncrementSize,

+ MVT::getIntegerVT(ExcessBits),

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+ }

+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {

+ SDValue InL, InH;

+ GetExpandedInteger(N->getOperand(0), InL, InH);

+ // Just truncate the low part of the source.

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);

+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {

+ SDValue Op = N->getOperand(0);

+ MVT SrcVT = Op.getValueType();

+ MVT DstVT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){

+ // Do a signed conversion then adjust the result.

+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);

+ SignedConv = TLI.LowerOperation(SignedConv, DAG);

+ // The result of the signed conversion needs adjusting if the 'sign bit' of

+ // the incoming integer was set. To handle this, we dynamically test to see

+ // if it is set, and, if so, add a fudge factor.

+ const uint64_t F32TwoE32 = 0x4F800000ULL;

+ const uint64_t F32TwoE64 = 0x5F800000ULL;

+ const uint64_t F32TwoE128 = 0x7F800000ULL;

+ APInt FF(32, 0);

+ if (SrcVT == MVT::i32)

+ FF = APInt(32, F32TwoE32);

+ else if (SrcVT == MVT::i64)

+ FF = APInt(32, F32TwoE64);

+ else if (SrcVT == MVT::i128)

+ FF = APInt(32, F32TwoE128);

+ else

+ assert(false && "Unsupported UINT_TO_FP!");

+ // Check whether the sign bit is set.

+ SDValue Lo, Hi;

+ GetExpandedInteger(Op, Lo, Hi);

+ SDValue SignSet = DAG.getSetCC(dl,

+ TLI.getSetCCResultType(Hi.getValueType()),

+ Hi, DAG.getConstant(0, Hi.getValueType()),

+ ISD::SETLT);

+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.

+ SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)),

+ TLI.getPointerTy());

+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.

+ SDValue Zero = DAG.getIntPtrConstant(0);

+ SDValue Four = DAG.getIntPtrConstant(4);

+ if (TLI.isBigEndian()) std::swap(Zero, Four);

+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,

+ Zero, Four);

+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();

+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);

+ Alignment = std::min(Alignment, 4u);

+ // Load the value out, extending it from f32 to the destination float type.

+ // FIXME: Avoid the extend by constructing the right constant pool?

+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),

+ FudgePtr, NULL, 0, MVT::f32,

+ false, Alignment);

+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);

+ }

+ // Otherwise, use a libcall.

+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);

+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&

+ "Don't know how to expand this UINT_TO_FP!");

+ return MakeLibCall(LC, DstVT, &Op, 1, true, dl);

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..00d71e1a4fe9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

@@ -0,0 +1,1074 @@

+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the SelectionDAG::LegalizeTypes method. It transforms

+// an arbitrary well-formed SelectionDAG to only consist of legal types. This

+// is common code shared among the LegalizeTypes*.cpp files.

+//

+//===----------------------------------------------------------------------===//

+#include "LegalizeTypes.h"

+#include "llvm/CallingConv.h"

+#include "llvm/ADT/SetVector.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Target/TargetData.h"

+using namespace llvm;

+static cl::opt<bool>

+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);

+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.

+void DAGTypeLegalizer::PerformExpensiveChecks() {

+ // If a node is not processed, then none of its values should be mapped by any

+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.

+ // If a node is processed, then each value with an illegal type must be mapped

+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.

+ // Values with a legal type may be mapped by ReplacedValues, but not by any of

+ // the other maps.

+ // Note that these invariants may not hold momentarily when processing a node:

+ // the node being processed may be put in a map before being marked Processed.

+ // Note that it is possible to have nodes marked NewNode in the DAG. This can

+ // occur in two ways. Firstly, a node may be created during legalization but

+ // never passed to the legalization core. This is usually due to the implicit

+ // folding that occurs when using the DAG.getNode operators. Secondly, a new

+ // node may be passed to the legalization core, but when analyzed may morph

+ // into a different node, leaving the original node as a NewNode in the DAG.

+ // A node may morph if one of its operands changes during analysis. Whether

+ // it actually morphs or not depends on whether, after updating its operands,

+ // it is equivalent to an existing node: if so, it morphs into that existing

+ // node (CSE). An operand can change during analysis if the operand is a new

+ // node that morphs, or it is a processed value that was mapped to some other

+ // value (as recorded in ReplacedValues) in which case the operand is turned

+ // into that other value. If a node morphs then the node it morphed into will

+ // be used instead of it for legalization, however the original node continues

+ // to live on in the DAG.

+ // The conclusion is that though there may be nodes marked NewNode in the DAG,

+ // all uses of such nodes are also marked NewNode: the result is a fungus of

+ // NewNodes growing on top of the useful nodes, and perhaps using them, but

+ // not used by them.

+ // If a value is mapped by ReplacedValues, then it must have no uses, except

+ // by nodes marked NewNode (see above).

+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.

+ // Note that ReplacedValues should be applied iteratively.

+ // Note that the ReplacedValues map may also map deleted nodes. By iterating

+ // over the DAG we only consider non-deleted nodes.

+ SmallVector<SDNode*, 16> NewNodes;

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = DAG.allnodes_end(); I != E; ++I) {

+ // Remember nodes marked NewNode - they are subject to extra checking below.

+ if (I->getNodeId() == NewNode)

+ NewNodes.push_back(I);

+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {

+ SDValue Res(I, i);

+ bool Failed = false;

+ unsigned Mapped = 0;

+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {

+ Mapped |= 1;

+ // Check that remapped values are only used by nodes marked NewNode.

+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();

+ UI != UE; ++UI)

+ if (UI.getUse().getResNo() == i)

+ assert(UI->getNodeId() == NewNode &&

+ "Remapped value has non-trivial use!");

+ // Check that the final result of applying ReplacedValues is not

+ // marked NewNode.

+ SDValue NewVal = ReplacedValues[Res];

+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);

+ while (I != ReplacedValues.end()) {

+ NewVal = I->second;

+ I = ReplacedValues.find(NewVal);

+ }

+ assert(NewVal.getNode()->getNodeId() != NewNode &&

+ "ReplacedValues maps to a new node!");

+ }

+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())

+ Mapped |= 2;

+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())

+ Mapped |= 4;

+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())

+ Mapped |= 8;

+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())

+ Mapped |= 16;

+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())

+ Mapped |= 32;

+ if (SplitVectors.find(Res) != SplitVectors.end())

+ Mapped |= 64;

+ if (WidenedVectors.find(Res) != WidenedVectors.end())

+ Mapped |= 128;

+ if (I->getNodeId() != Processed) {

+ if (Mapped != 0) {

+ cerr << "Unprocessed value in a map!";

+ Failed = true;

+ }

+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {

+ if (Mapped > 1) {

+ cerr << "Value with legal type was transformed!";

+ Failed = true;

+ }

+ } else {

+ if (Mapped == 0) {

+ cerr << "Processed value not in any map!";

+ Failed = true;

+ } else if (Mapped & (Mapped - 1)) {

+ cerr << "Value in multiple maps!";

+ Failed = true;

+ }

+ if (Failed) {

+ if (Mapped & 1)

+ cerr << " ReplacedValues";

+ if (Mapped & 2)

+ cerr << " PromotedIntegers";

+ if (Mapped & 4)

+ cerr << " SoftenedFloats";

+ if (Mapped & 8)

+ cerr << " ScalarizedVectors";

+ if (Mapped & 16)

+ cerr << " ExpandedIntegers";

+ if (Mapped & 32)

+ cerr << " ExpandedFloats";

+ if (Mapped & 64)

+ cerr << " SplitVectors";

+ if (Mapped & 128)

+ cerr << " WidenedVectors";

+ cerr << "\n";

+ abort();

+ }

+ // Checked that NewNodes are only used by other NewNodes.

+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {

+ SDNode *N = NewNodes[i];

+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

+ UI != UE; ++UI)

+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");

+ }

+/// run - This is the main entry point for the type legalizer. This does a

+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"

+/// if it made any changes.

+bool DAGTypeLegalizer::run() {

+ bool Changed = false;

+ // Create a dummy node (which is not added to allnodes), that adds a reference

+ // to the root node, preventing it from being deleted, and tracking any

+ // changes of the root.

+ HandleSDNode Dummy(DAG.getRoot());

+ Dummy.setNodeId(Unanalyzed);

+ // The root of the dag may dangle to deleted nodes until the type legalizer is

+ // done. Set it to null to avoid confusion.

+ DAG.setRoot(SDValue());

+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'

+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if

+ // non-leaves.

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = DAG.allnodes_end(); I != E; ++I) {

+ if (I->getNumOperands() == 0) {

+ I->setNodeId(ReadyToProcess);

+ Worklist.push_back(I);

+ } else {

+ I->setNodeId(Unanalyzed);

+ }

+ // Now that we have a set of nodes to process, handle them all.

+ while (!Worklist.empty()) {

+#ifndef XDEBUG

+ if (EnableExpensiveChecks)

+#endif

+ PerformExpensiveChecks();

+ SDNode *N = Worklist.back();

+ Worklist.pop_back();

+ assert(N->getNodeId() == ReadyToProcess &&

+ "Node should be ready if on worklist!");

+ if (IgnoreNodeResults(N))

+ goto ScanOperands;

+ // Scan the values produced by the node, checking to see if any result

+ // types are illegal.

+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {

+ MVT ResultVT = N->getValueType(i);

+ switch (getTypeAction(ResultVT)) {

+ default:

+ assert(false && "Unknown action!");

+ case Legal:

+ break;

+ // The following calls must take care of *all* of the node's results,

+ // not just the illegal result they were passed (this includes results

+ // with a legal type). Results can be remapped using ReplaceValueWith,

+ // or their promoted/expanded/etc values registered in PromotedIntegers,

+ // ExpandedIntegers etc.

+ case PromoteInteger:

+ PromoteIntegerResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case ExpandInteger:

+ ExpandIntegerResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case SoftenFloat:

+ SoftenFloatResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case ExpandFloat:

+ ExpandFloatResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case ScalarizeVector:

+ ScalarizeVectorResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case SplitVector:

+ SplitVectorResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ case WidenVector:

+ WidenVectorResult(N, i);

+ Changed = true;

+ goto NodeDone;

+ }

+ScanOperands:

+ // Scan the operand list for the node, handling any nodes with operands that

+ // are illegal.

+ {

+ unsigned NumOperands = N->getNumOperands();

+ bool NeedsReanalyzing = false;

+ unsigned i;

+ for (i = 0; i != NumOperands; ++i) {

+ if (IgnoreNodeResults(N->getOperand(i).getNode()))

+ continue;

+ MVT OpVT = N->getOperand(i).getValueType();

+ switch (getTypeAction(OpVT)) {

+ default:

+ assert(false && "Unknown action!");

+ case Legal:

+ continue;

+ // The following calls must either replace all of the node's results

+ // using ReplaceValueWith, and return "false"; or update the node's

+ // operands in place, and return "true".

+ case PromoteInteger:

+ NeedsReanalyzing = PromoteIntegerOperand(N, i);

+ Changed = true;

+ break;

+ case ExpandInteger:

+ NeedsReanalyzing = ExpandIntegerOperand(N, i);

+ Changed = true;

+ break;

+ case SoftenFloat:

+ NeedsReanalyzing = SoftenFloatOperand(N, i);

+ Changed = true;

+ break;

+ case ExpandFloat:

+ NeedsReanalyzing = ExpandFloatOperand(N, i);

+ Changed = true;

+ break;

+ case ScalarizeVector:

+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);

+ Changed = true;

+ break;

+ case SplitVector:

+ NeedsReanalyzing = SplitVectorOperand(N, i);

+ Changed = true;

+ break;

+ case WidenVector:

+ NeedsReanalyzing = WidenVectorOperand(N, i);

+ Changed = true;

+ break;

+ }

+ break;

+ }

+ // The sub-method updated N in place. Check to see if any operands are new,

+ // and if so, mark them. If the node needs revisiting, don't add all users

+ // to the worklist etc.

+ if (NeedsReanalyzing) {

+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");

+ N->setNodeId(NewNode);

+ // Recompute the NodeId and correct processed operands, adding the node to

+ // the worklist if ready.

+ SDNode *M = AnalyzeNewNode(N);

+ if (M == N)

+ // The node didn't morph - nothing special to do, it will be revisited.

+ continue;

+ // The node morphed - this is equivalent to legalizing by replacing every

+ // value of N with the corresponding value of M. So do that now. However

+ // there is no need to remember the replacement - morphing will make sure

+ // it is never used non-trivially.

+ assert(N->getNumValues() == M->getNumValues() &&

+ "Node morphing changed the number of results!");

+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)

+ // Replacing the value takes care of remapping the new value. Do the

+ // replacement without recording it in ReplacedValues. This does not

+ // expunge From but that is fine - it is not really a new node.

+ ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i));

+ assert(N->getNodeId() == NewNode && "Unexpected node state!");

+ // The node continues to live on as part of the NewNode fungus that

+ // grows on top of the useful nodes. Nothing more needs to be done

+ // with it - move on to the next node.

+ continue;

+ }

+ if (i == NumOperands) {

+ DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n");

+ }

+NodeDone:

+ // If we reach here, the node was processed, potentially creating new nodes.

+ // Mark it as processed and add its users to the worklist as appropriate.

+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");

+ N->setNodeId(Processed);

+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();

+ UI != E; ++UI) {

+ SDNode *User = *UI;

+ int NodeId = User->getNodeId();

+ // This node has two options: it can either be a new node or its Node ID

+ // may be a count of the number of operands it has that are not ready.

+ if (NodeId > 0) {

+ User->setNodeId(NodeId-1);

+ // If this was the last use it was waiting on, add it to the ready list.

+ if (NodeId-1 == ReadyToProcess)

+ Worklist.push_back(User);

+ continue;

+ }

+ // If this is an unreachable new node, then ignore it. If it ever becomes

+ // reachable by being used by a newly created node then it will be handled

+ // by AnalyzeNewNode.

+ if (NodeId == NewNode)

+ continue;

+ // Otherwise, this node is new: this is the first operand of it that

+ // became ready. Its new NodeId is the number of operands it has minus 1

+ // (as this node is now processed).

+ assert(NodeId == Unanalyzed && "Unknown node ID!");

+ User->setNodeId(User->getNumOperands() - 1);

+ // If the node only has a single operand, it is now ready.

+ if (User->getNumOperands() == 1)

+ Worklist.push_back(User);

+ }

+#ifndef XDEBUG

+ if (EnableExpensiveChecks)

+#endif

+ PerformExpensiveChecks();

+ // If the root changed (e.g. it was a dead load) update the root.

+ DAG.setRoot(Dummy.getValue());

+ // Remove dead nodes. This is important to do for cleanliness but also before

+ // the checking loop below. Implicit folding by the DAG.getNode operators and

+ // node morphing can cause unreachable nodes to be around with their flags set

+ // to new.

+ DAG.RemoveDeadNodes();

+ // In a debug build, scan all the nodes to make sure we found them all. This

+ // ensures that there are no cycles and that everything got processed.

+#ifndef NDEBUG

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = DAG.allnodes_end(); I != E; ++I) {

+ bool Failed = false;

+ // Check that all result types are legal.

+ if (!IgnoreNodeResults(I))

+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)

+ if (!isTypeLegal(I->getValueType(i))) {

+ cerr << "Result type " << i << " illegal!\n";

+ Failed = true;

+ }

+ // Check that all operand types are legal.

+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)

+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&

+ !isTypeLegal(I->getOperand(i).getValueType())) {

+ cerr << "Operand type " << i << " illegal!\n";

+ Failed = true;

+ }

+ if (I->getNodeId() != Processed) {

+ if (I->getNodeId() == NewNode)

+ cerr << "New node not analyzed?\n";

+ else if (I->getNodeId() == Unanalyzed)

+ cerr << "Unanalyzed node not noticed?\n";

+ else if (I->getNodeId() > 0)

+ cerr << "Operand not processed?\n";

+ else if (I->getNodeId() == ReadyToProcess)

+ cerr << "Not added to worklist?\n";

+ Failed = true;

+ }

+ if (Failed) {

+ I->dump(&DAG); cerr << "\n";

+ abort();

+ }

+#endif

+ return Changed;

+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially

+/// new nodes. Correct any processed operands (this may change the node) and

+/// calculate the NodeId. If the node itself changes to a processed node, it

+/// is not remapped - the caller needs to take care of this.

+/// Returns the potentially changed node.

+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {

+ // If this was an existing node that is already done, we're done.

+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)

+ return N;

+ // Remove any stale map entries.

+ ExpungeNode(N);

+ // Okay, we know that this node is new. Recursively walk all of its operands

+ // to see if they are new also. The depth of this walk is bounded by the size

+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry

+ // about revisiting of nodes.

+ //

+ // As we walk the operands, keep track of the number of nodes that are

+ // processed. If non-zero, this will become the new nodeid of this node.

+ // Operands may morph when they are analyzed. If so, the node will be

+ // updated after all operands have been analyzed. Since this is rare,

+ // the code tries to minimize overhead in the non-morphing case.

+ SmallVector<SDValue, 8> NewOps;

+ unsigned NumProcessed = 0;

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ SDValue OrigOp = N->getOperand(i);

+ SDValue Op = OrigOp;

+ AnalyzeNewValue(Op); // Op may morph.

+ if (Op.getNode()->getNodeId() == Processed)

+ ++NumProcessed;

+ if (!NewOps.empty()) {

+ // Some previous operand changed. Add this one to the list.

+ NewOps.push_back(Op);

+ } else if (Op != OrigOp) {

+ // This is the first operand to change - add all operands so far.

+ for (unsigned j = 0; j < i; ++j)

+ NewOps.push_back(N->getOperand(j));

+ NewOps.push_back(Op);

+ }

+ // Some operands changed - update the node.

+ if (!NewOps.empty()) {

+ SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],

+ NewOps.size()).getNode();

+ if (M != N) {

+ // The node morphed into a different node. Normally for this to happen

+ // the original node would have to be marked NewNode. However this can

+ // in theory momentarily not be the case while ReplaceValueWith is doing

+ // its stuff. Mark the original node NewNode to help sanity checking.

+ N->setNodeId(NewNode);

+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)

+ // It morphed into a previously analyzed node - nothing more to do.

+ return M;

+ // It morphed into a different new node. Do the equivalent of passing

+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need

+ // to remap the operands, since they are the same as the operands we

+ // remapped above.

+ N = M;

+ ExpungeNode(N);

+ }

+ // Calculate the NodeId.

+ N->setNodeId(N->getNumOperands() - NumProcessed);

+ if (N->getNodeId() == ReadyToProcess)

+ Worklist.push_back(N);

+ return N;

+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.

+/// If the node changes to a processed node, then remap it.

+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {

+ Val.setNode(AnalyzeNewNode(Val.getNode()));

+ if (Val.getNode()->getNodeId() == Processed)

+ // We were passed a processed node, or it morphed into one - remap it.

+ RemapValue(Val);

+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.

+/// This can occur when a node is deleted then reallocated as a new node -

+/// the mapping in ReplacedValues applies to the deleted node, not the new

+/// one.

+/// The only map that can have a deleted node as a source is ReplacedValues.

+/// Other maps can have deleted nodes as targets, but since their looked-up

+/// values are always immediately remapped using RemapValue, resulting in a

+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue

+/// always performs correct mappings. In order to keep the mapping correct,

+/// ExpungeNode should be called on any new nodes *before* adding them as

+/// either source or target to ReplacedValues (which typically means calling

+/// Expunge when a new node is first seen, since it may no longer be marked

+/// NewNode by the time it is added to ReplacedValues).

+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {

+ if (N->getNodeId() != NewNode)

+ return;

+ // If N is not remapped by ReplacedValues then there is nothing to do.

+ unsigned i, e;

+ for (i = 0, e = N->getNumValues(); i != e; ++i)

+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())

+ break;

+ if (i == e)

+ return;

+ // Remove N from all maps - this is expensive but rare.

+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),

+ E = PromotedIntegers.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second);

+ }

+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),

+ E = SoftenedFloats.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second);

+ }

+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),

+ E = ScalarizedVectors.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second);

+ }

+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),

+ E = WidenedVectors.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second);

+ }

+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator

+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){

+ assert(I->first.getNode() != N);

+ RemapValue(I->second.first);

+ RemapValue(I->second.second);

+ }

+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator

+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second.first);

+ RemapValue(I->second.second);

+ }

+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator

+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {

+ assert(I->first.getNode() != N);

+ RemapValue(I->second.first);

+ RemapValue(I->second.second);

+ }

+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),

+ E = ReplacedValues.end(); I != E; ++I)

+ RemapValue(I->second);

+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)

+ ReplacedValues.erase(SDValue(N, i));

+/// RemapValue - If the specified value was already legalized to another value,

+/// replace it by that value.

+void DAGTypeLegalizer::RemapValue(SDValue &N) {

+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);

+ if (I != ReplacedValues.end()) {

+ // Use path compression to speed up future lookups if values get multiply

+ // replaced with other values.

+ RemapValue(I->second);

+ N = I->second;

+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");

+ }

+namespace {

+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for

+ /// updates to nodes and recomputes their ready state.

+ class VISIBILITY_HIDDEN NodeUpdateListener :

+ public SelectionDAG::DAGUpdateListener {

+ DAGTypeLegalizer &DTL;

+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;

+ public:

+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,

+ SmallSetVector<SDNode*, 16> &nta)

+ : DTL(dtl), NodesToAnalyze(nta) {}

+ virtual void NodeDeleted(SDNode *N, SDNode *E) {

+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&

+ N->getNodeId() != DAGTypeLegalizer::Processed &&

+ "Invalid node ID for RAUW deletion!");

+ // It is possible, though rare, for the deleted node N to occur as a

+ // target in a map, so note the replacement N -> E in ReplacedValues.

+ assert(E && "Node not replaced?");

+ DTL.NoteDeletion(N, E);

+ // In theory the deleted node could also have been scheduled for analysis.

+ // So remove it from the set of nodes which will be analyzed.

+ NodesToAnalyze.remove(N);

+ // In general nothing needs to be done for E, since it didn't change but

+ // only gained new uses. However N -> E was just added to ReplacedValues,

+ // and the result of a ReplacedValues mapping is not allowed to be marked

+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.

+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)

+ NodesToAnalyze.insert(E);

+ }

+ virtual void NodeUpdated(SDNode *N) {

+ // Node updates can mean pretty much anything. It is possible that an

+ // operand was set to something already processed (f.e.) in which case

+ // this node could become ready. Recompute its flags.

+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&

+ N->getNodeId() != DAGTypeLegalizer::Processed &&

+ "Invalid node ID for RAUW deletion!");

+ N->setNodeId(DAGTypeLegalizer::NewNode);

+ NodesToAnalyze.insert(N);

+ }

+ };

+/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the

+/// DAG causing any uses of From to use To instead, but without expunging From

+/// or recording the replacement in ReplacedValues. Do not call directly unless

+/// you really know what you are doing!

+void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {

+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");

+ // If expansion produced new nodes, make sure they are properly marked.

+ AnalyzeNewValue(To); // Expunges To.

+ // Anything that used the old node should now use the new one. Note that this

+ // can potentially cause recursive merging.

+ SmallSetVector<SDNode*, 16> NodesToAnalyze;

+ NodeUpdateListener NUL(*this, NodesToAnalyze);

+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);

+ // Process the list of nodes that need to be reanalyzed.

+ while (!NodesToAnalyze.empty()) {

+ SDNode *N = NodesToAnalyze.back();

+ NodesToAnalyze.pop_back();

+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)

+ // The node was analyzed while reanalyzing an earlier node - it is safe to

+ // skip. Note that this is not a morphing node - otherwise it would still

+ // be marked NewNode.

+ continue;

+ // Analyze the node's operands and recalculate the node ID.

+ SDNode *M = AnalyzeNewNode(N);

+ if (M != N) {

+ // The node morphed into a different node. Make everyone use the new node

+ // instead.

+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");

+ assert(N->getNumValues() == M->getNumValues() &&

+ "Node morphing changed the number of results!");

+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {

+ SDValue OldVal(N, i);

+ SDValue NewVal(M, i);

+ if (M->getNodeId() == Processed)

+ RemapValue(NewVal);

+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);

+ }

+ // The original node continues to exist in the DAG, marked NewNode.

+ }

+/// ReplaceValueWith - The specified value was legalized to the specified other

+/// value. Update the DAG and NodeIds replacing any uses of From to use To

+/// instead.

+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {

+ assert(From.getNode()->getNodeId() == ReadyToProcess &&

+ "Only the node being processed may be remapped!");

+ // If expansion produced new nodes, make sure they are properly marked.

+ ExpungeNode(From.getNode());

+ AnalyzeNewValue(To); // Expunges To.

+ // The old node may still be present in a map like ExpandedIntegers or

+ // PromotedIntegers. Inform maps about the replacement.

+ ReplacedValues[From] = To;

+ // Do the replacement.

+ ReplaceValueWithHelper(From, To);

+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {

+ AnalyzeNewValue(Result);

+ SDValue &OpEntry = PromotedIntegers[Op];

+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");

+ OpEntry = Result;

+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {

+ AnalyzeNewValue(Result);

+ SDValue &OpEntry = SoftenedFloats[Op];

+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");

+ OpEntry = Result;

+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {

+ AnalyzeNewValue(Result);

+ SDValue &OpEntry = ScalarizedVectors[Op];

+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");

+ OpEntry = Result;

+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,

+ SDValue &Hi) {

+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];

+ RemapValue(Entry.first);

+ RemapValue(Entry.second);

+ assert(Entry.first.getNode() && "Operand isn't expanded");

+ Lo = Entry.first;

+ Hi = Entry.second;

+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,

+ SDValue Hi) {

+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.

+ AnalyzeNewValue(Lo);

+ AnalyzeNewValue(Hi);

+ // Remember that this is the result of the node.

+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];

+ assert(Entry.first.getNode() == 0 && "Node already expanded");

+ Entry.first = Lo;

+ Entry.second = Hi;

+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,

+ SDValue &Hi) {

+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];

+ RemapValue(Entry.first);

+ RemapValue(Entry.second);

+ assert(Entry.first.getNode() && "Operand isn't expanded");

+ Lo = Entry.first;

+ Hi = Entry.second;

+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,

+ SDValue Hi) {

+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.

+ AnalyzeNewValue(Lo);

+ AnalyzeNewValue(Hi);

+ // Remember that this is the result of the node.

+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];

+ assert(Entry.first.getNode() == 0 && "Node already expanded");

+ Entry.first = Lo;

+ Entry.second = Hi;

+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,

+ SDValue &Hi) {

+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];

+ RemapValue(Entry.first);

+ RemapValue(Entry.second);

+ assert(Entry.first.getNode() && "Operand isn't split");

+ Lo = Entry.first;

+ Hi = Entry.second;

+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,

+ SDValue Hi) {

+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.

+ AnalyzeNewValue(Lo);

+ AnalyzeNewValue(Hi);

+ // Remember that this is the result of the node.

+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];

+ assert(Entry.first.getNode() == 0 && "Node already split");

+ Entry.first = Lo;

+ Entry.second = Hi;

+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {

+ AnalyzeNewValue(Result);

+ SDValue &OpEntry = WidenedVectors[Op];

+ assert(OpEntry.getNode() == 0 && "Node already widened!");

+ OpEntry = Result;

+//===----------------------------------------------------------------------===//

+// Utilities.

+//===----------------------------------------------------------------------===//

+/// BitConvertToInteger - Convert to an integer of the same size.

+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {

+ unsigned BitWidth = Op.getValueType().getSizeInBits();

+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),

+ MVT::getIntegerVT(BitWidth), Op);

+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the

+/// same size.

+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {

+ assert(Op.getValueType().isVector() && "Only applies to vectors!");

+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();

+ MVT EltNVT = MVT::getIntegerVT(EltWidth);

+ unsigned NumElts = Op.getValueType().getVectorNumElements();

+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),

+ MVT::getVectorVT(EltNVT, NumElts), Op);

+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,

+ MVT DestVT) {

+ DebugLoc dl = Op.getDebugLoc();

+ // Create the stack frame object. Make sure it is aligned for both

+ // the source and destination types.

+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);

+ // Emit a store to the stack slot.

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);

+ // Result is a load from the stack slot.

+ return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);

+/// CustomLowerNode - Replace the node's results with custom code provided

+/// by the target and return "true", or do nothing and return "false".

+/// The last parameter is FALSE if we are dealing with a node with legal

+/// result types and illegal operand. The second parameter denotes the type of

+/// illegal OperandNo in that case.

+/// The last parameter being TRUE means we are dealing with a

+/// node with illegal result types. The second parameter denotes the type of

+/// illegal ResNo in that case.

+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {

+ // See if the target wants to custom lower this node.

+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)

+ return false;

+ SmallVector<SDValue, 8> Results;

+ if (LegalizeResult)

+ TLI.ReplaceNodeResults(N, Results, DAG);

+ else

+ TLI.LowerOperationWrapper(N, Results, DAG);

+ if (Results.empty())

+ // The target didn't want to custom lower it after all.

+ return false;

+ // Make everything that once used N's values now use those in Results instead.

+ assert(Results.size() == N->getNumValues() &&

+ "Custom lowering returned the wrong number of results!");

+ for (unsigned i = 0, e = Results.size(); i != e; ++i)

+ ReplaceValueWith(SDValue(N, i), Results[i]);

+ return true;

+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type

+/// which is split into two not necessarily identical pieces.

+void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {

+ if (!InVT.isVector()) {

+ LoVT = HiVT = TLI.getTypeToTransformTo(InVT);

+ } else {

+ MVT NewEltVT = InVT.getVectorElementType();

+ unsigned NumElements = InVT.getVectorNumElements();

+ if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector.

+ NumElements >>= 1;

+ LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements);

+ } else { // Non-power-of-two vectors.

+ unsigned NewNumElts_Lo = 1 << Log2_32(NumElements);

+ unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo;

+ LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo);

+ HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi);

+ }

+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and

+/// high parts of the given value.

+void DAGTypeLegalizer::GetPairElements(SDValue Pair,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = Pair.getDebugLoc();

+ MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType());

+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,

+ DAG.getIntPtrConstant(0));

+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,

+ DAG.getIntPtrConstant(1));

+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT,

+ SDValue Index) {

+ DebugLoc dl = Index.getDebugLoc();

+ // Make sure the index type is big enough to compute in.

+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))

+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);

+ else

+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);

+ // Calculate the element offset and add it to the pointer.

+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.

+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,

+ DAG.getConstant(EltSize, Index.getValueType()));

+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);

+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.

+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {

+ // Arbitrarily use dlHi for result DebugLoc

+ DebugLoc dlHi = Hi.getDebugLoc();

+ DebugLoc dlLo = Lo.getDebugLoc();

+ MVT LVT = Lo.getValueType();

+ MVT HVT = Hi.getValueType();

+ MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits());

+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);

+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);

+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,

+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));

+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);

+/// LibCallify - Convert the node into a libcall with the same prototype.

+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,

+ bool isSigned) {

+ unsigned NumOps = N->getNumOperands();

+ DebugLoc dl = N->getDebugLoc();

+ if (NumOps == 0) {

+ return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);

+ } else if (NumOps == 1) {

+ SDValue Op = N->getOperand(0);

+ return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);

+ } else if (NumOps == 2) {

+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);

+ }

+ SmallVector<SDValue, 8> Ops(NumOps);

+ for (unsigned i = 0; i < NumOps; ++i)

+ Ops[i] = N->getOperand(i);

+ return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);

+/// MakeLibCall - Generate a libcall taking the given operands as arguments and

+/// returning a result of type RetVT.

+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,

+ const SDValue *Ops, unsigned NumOps,

+ bool isSigned, DebugLoc dl) {

+ TargetLowering::ArgListTy Args;

+ Args.reserve(NumOps);

+ TargetLowering::ArgListEntry Entry;

+ for (unsigned i = 0; i != NumOps; ++i) {

+ Entry.Node = Ops[i];

+ Entry.Ty = Entry.Node.getValueType().getTypeForMVT();

+ Entry.isSExt = isSigned;

+ Entry.isZExt = !isSigned;

+ Args.push_back(Entry);

+ }

+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),

+ TLI.getPointerTy());

+ const Type *RetTy = RetVT.getTypeForMVT();

+ std::pair<SDValue,SDValue> CallInfo =

+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,

+ false, CallingConv::C, false, Callee, Args, DAG, dl);

+ return CallInfo.first;

+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean

+/// of the given type. A target boolean is an integer value, not necessarily of

+/// type i1, the bits of which conform to getBooleanContents.

+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {

+ DebugLoc dl = Bool.getDebugLoc();

+ ISD::NodeType ExtendCode;

+ switch (TLI.getBooleanContents()) {

+ default:

+ assert(false && "Unknown BooleanContent!");

+ case TargetLowering::UndefinedBooleanContent:

+ // Extend to VT by adding rubbish bits.

+ ExtendCode = ISD::ANY_EXTEND;

+ break;

+ case TargetLowering::ZeroOrOneBooleanContent:

+ // Extend to VT by adding zero bits.

+ ExtendCode = ISD::ZERO_EXTEND;

+ break;

+ case TargetLowering::ZeroOrNegativeOneBooleanContent: {

+ // Extend to VT by copying the sign bit.

+ ExtendCode = ISD::SIGN_EXTEND;

+ break;

+ }

+ return DAG.getNode(ExtendCode, dl, VT, Bool);

+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT

+/// bits in Hi.

+void DAGTypeLegalizer::SplitInteger(SDValue Op,

+ MVT LoVT, MVT HiVT,

+ SDValue &Lo, SDValue &Hi) {

+ DebugLoc dl = Op.getDebugLoc();

+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==

+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");

+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);

+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,

+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));

+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);

+/// SplitInteger - Return the lower and upper halves of Op's bits in a value

+/// type half the size of Op's.

+void DAGTypeLegalizer::SplitInteger(SDValue Op,

+ SDValue &Lo, SDValue &Hi) {

+ MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2);

+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);

+//===----------------------------------------------------------------------===//

+// Entry Point

+//===----------------------------------------------------------------------===//

+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that

+/// only uses types natively supported by the target. Returns "true" if it made

+/// any changes.

+///

+/// Note that this is an involved process that may invalidate pointers into

+/// the graph.

+bool SelectionDAG::LegalizeTypes() {

+ return DAGTypeLegalizer(*this).run();

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..75c89246a31e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h

@@ -0,0 +1,736 @@

+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file defines the DAGTypeLegalizer class. This is a private interface

+// shared between the code that implements the SelectionDAG::LegalizeTypes

+// method.

+//

+//===----------------------------------------------------------------------===//

+#ifndef SELECTIONDAG_LEGALIZETYPES_H

+#define SELECTIONDAG_LEGALIZETYPES_H

+#define DEBUG_TYPE "legalize-types"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/DenseSet.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/Debug.h"

+namespace llvm {

+//===----------------------------------------------------------------------===//

+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks

+/// on it until only value types the target machine can handle are left. This

+/// involves promoting small sizes to large sizes or splitting up large values

+/// into small values.

+///

+class VISIBILITY_HIDDEN DAGTypeLegalizer {

+ TargetLowering &TLI;

+ SelectionDAG &DAG;

+public:

+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information

+ // about the state of the node. The enum has all the values.

+ enum NodeIdFlags {

+ /// ReadyToProcess - All operands have been processed, so this node is ready

+ /// to be handled.

+ ReadyToProcess = 0,

+ /// NewNode - This is a new node, not before seen, that was created in the

+ /// process of legalizing some other node.

+ NewNode = -1,

+ /// Unanalyzed - This node's ID needs to be set to the number of its

+ /// unprocessed operands.

+ Unanalyzed = -2,

+ /// Processed - This is a node that has already been processed.

+ Processed = -3

+ // 1+ - This is a node which has this many unprocessed operands.

+ };

+private:

+ enum LegalizeAction {

+ Legal, // The target natively supports this type.

+ PromoteInteger, // Replace this integer type with a larger one.

+ ExpandInteger, // Split this integer type into two of half the size.

+ SoftenFloat, // Convert this float type to a same size integer type.

+ ExpandFloat, // Split this float type into two of half the size.

+ ScalarizeVector, // Replace this one-element vector with its element type.

+ SplitVector, // This vector type should be split into smaller vectors.

+ WidenVector // This vector type should be widened into a larger vector.

+ };

+ /// ValueTypeActions - This is a bitvector that contains two bits for each

+ /// simple value type, where the two bits correspond to the LegalizeAction

+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".

+ TargetLowering::ValueTypeActionImpl ValueTypeActions;

+ /// getTypeAction - Return how we should legalize values of this type.

+ LegalizeAction getTypeAction(MVT VT) const {

+ switch (ValueTypeActions.getTypeAction(VT)) {

+ default:

+ assert(false && "Unknown legalize action!");

+ case TargetLowering::Legal:

+ return Legal;

+ case TargetLowering::Promote:

+ // Promote can mean

+ // 1) For integers, use a larger integer type (e.g. i8 -> i32).

+ // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).

+ if (!VT.isVector())

+ return PromoteInteger;

+ else

+ return WidenVector;

+ case TargetLowering::Expand:

+ // Expand can mean

+ // 1) split scalar in half, 2) convert a float to an integer,

+ // 3) scalarize a single-element vector, 4) split a vector in two.

+ if (!VT.isVector()) {

+ if (VT.isInteger())

+ return ExpandInteger;

+ else if (VT.getSizeInBits() ==

+ TLI.getTypeToTransformTo(VT).getSizeInBits())

+ return SoftenFloat;

+ else

+ return ExpandFloat;

+ } else if (VT.getVectorNumElements() == 1) {

+ return ScalarizeVector;

+ } else {

+ return SplitVector;

+ }

+ /// isTypeLegal - Return true if this type is legal on this target.

+ bool isTypeLegal(MVT VT) const {

+ return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;

+ }

+ /// IgnoreNodeResults - Pretend all of this node's results are legal.

+ bool IgnoreNodeResults(SDNode *N) const {

+ return N->getOpcode() == ISD::TargetConstant;

+ }

+ /// PromotedIntegers - For integer nodes that are below legal width, this map

+ /// indicates what promoted value to use.

+ DenseMap<SDValue, SDValue> PromotedIntegers;

+ /// ExpandedIntegers - For integer nodes that need to be expanded this map

+ /// indicates which operands are the expanded version of the input.

+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;

+ /// SoftenedFloats - For floating point nodes converted to integers of

+ /// the same size, this map indicates the converted value to use.

+ DenseMap<SDValue, SDValue> SoftenedFloats;

+ /// ExpandedFloats - For float nodes that need to be expanded this map

+ /// indicates which operands are the expanded version of the input.

+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;

+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the

+ /// scalar value of type 'ty' to use.

+ DenseMap<SDValue, SDValue> ScalarizedVectors;

+ /// SplitVectors - For nodes that need to be split this map indicates

+ /// which operands are the expanded version of the input.

+ DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;

+ /// WidenedVectors - For vector nodes that need to be widened, indicates

+ /// the widened value to use.

+ DenseMap<SDValue, SDValue> WidenedVectors;

+ /// ReplacedValues - For values that have been replaced with another,

+ /// indicates the replacement value to use.

+ DenseMap<SDValue, SDValue> ReplacedValues;

+ /// Worklist - This defines a worklist of nodes to process. In order to be

+ /// pushed onto this worklist, all operands of a node must have already been

+ /// processed.

+ SmallVector<SDNode*, 128> Worklist;

+public:

+ explicit DAGTypeLegalizer(SelectionDAG &dag)

+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),

+ ValueTypeActions(TLI.getValueTypeActions()) {

+ assert(MVT::LAST_VALUETYPE <= 32 &&

+ "Too many value types for ValueTypeActions to hold!");

+ }

+ /// run - This is the main entry point for the type legalizer. This does a

+ /// top-down traversal of the dag, legalizing types as it goes. Returns

+ /// "true" if it made any changes.

+ bool run();

+ void NoteDeletion(SDNode *Old, SDNode *New) {

+ ExpungeNode(Old);

+ ExpungeNode(New);

+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)

+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);

+ }

+private:

+ SDNode *AnalyzeNewNode(SDNode *N);

+ void AnalyzeNewValue(SDValue &Val);

+ void ExpungeNode(SDNode *N);

+ void PerformExpensiveChecks();

+ void RemapValue(SDValue &N);

+ // Common routines.

+ SDValue BitConvertToInteger(SDValue Op);

+ SDValue BitConvertVectorToIntegerVector(SDValue Op);

+ SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT);

+ bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult);

+ SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);

+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);

+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);

+ SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT,

+ const SDValue *Ops, unsigned NumOps, bool isSigned,

+ DebugLoc dl);

+ SDValue PromoteTargetBoolean(SDValue Bool, MVT VT);

+ void ReplaceValueWith(SDValue From, SDValue To);

+ void ReplaceValueWithHelper(SDValue From, SDValue To);

+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);

+ void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT,

+ SDValue &Lo, SDValue &Hi);

+ //===--------------------------------------------------------------------===//

+ // Integer Promotion Support: LegalizeIntegerTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a

+ /// larger integer type, this returns the promoted value. The low bits of the

+ /// promoted value corresponding to the original type are exactly equal to Op.

+ /// The extra bits contain rubbish, so the promoted value may need to be zero-

+ /// or sign-extended from the original type before it is usable (the helpers

+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).

+ /// For example, if Op is an i16 and was promoted to an i32, then this method

+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper

+ /// 16 bits of which contain rubbish.

+ SDValue GetPromotedInteger(SDValue Op) {

+ SDValue &PromotedOp = PromotedIntegers[Op];

+ RemapValue(PromotedOp);

+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");

+ return PromotedOp;

+ }

+ void SetPromotedInteger(SDValue Op, SDValue Result);

+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the

+ /// final size.

+ SDValue SExtPromotedInteger(SDValue Op) {

+ MVT OldVT = Op.getValueType();

+ DebugLoc dl = Op.getDebugLoc();

+ Op = GetPromotedInteger(Op);

+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,

+ DAG.getValueType(OldVT));

+ }

+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the

+ /// final size.

+ SDValue ZExtPromotedInteger(SDValue Op) {

+ MVT OldVT = Op.getValueType();

+ DebugLoc dl = Op.getDebugLoc();

+ Op = GetPromotedInteger(Op);

+ return DAG.getZeroExtendInReg(Op, dl, OldVT);

+ }

+ // Integer Result Promotion.

+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);

+ SDValue PromoteIntRes_AssertSext(SDNode *N);

+ SDValue PromoteIntRes_AssertZext(SDNode *N);

+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);

+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);

+ SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);

+ SDValue PromoteIntRes_BSWAP(SDNode *N);

+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);

+ SDValue PromoteIntRes_Constant(SDNode *N);

+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);

+ SDValue PromoteIntRes_CTLZ(SDNode *N);

+ SDValue PromoteIntRes_CTPOP(SDNode *N);

+ SDValue PromoteIntRes_CTTZ(SDNode *N);

+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);

+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);

+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);

+ SDValue PromoteIntRes_Overflow(SDNode *N);

+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);

+ SDValue PromoteIntRes_SDIV(SDNode *N);

+ SDValue PromoteIntRes_SELECT(SDNode *N);

+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);

+ SDValue PromoteIntRes_SETCC(SDNode *N);

+ SDValue PromoteIntRes_SHL(SDNode *N);

+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);

+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);

+ SDValue PromoteIntRes_SRA(SDNode *N);

+ SDValue PromoteIntRes_SRL(SDNode *N);

+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);

+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);

+ SDValue PromoteIntRes_UDIV(SDNode *N);

+ SDValue PromoteIntRes_UNDEF(SDNode *N);

+ SDValue PromoteIntRes_VAARG(SDNode *N);

+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);

+ // Integer Operand Promotion.

+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);

+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);

+ SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);

+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);

+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);

+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);

+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);

+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);

+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_Shift(SDNode *N);

+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);

+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);

+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);

+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);

+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);

+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);

+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);

+ //===--------------------------------------------------------------------===//

+ // Integer Expansion Support: LegalizeIntegerTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetExpandedInteger - Given a processed operand Op which was expanded into

+ /// two integers of half the size, this returns the two halves. The low bits

+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.

+ /// For example, if Op is an i64 which was expanded into two i32's, then this

+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of

+ /// Op, and Hi being equal to the upper 32 bits.

+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);

+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);

+ // Integer Result Expansion.

+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);

+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,

+ SDValue &Lo, SDValue &Hi);

+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);

+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);

+ // Integer Operand Expansion.

+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);

+ SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);

+ SDValue ExpandIntOp_BR_CC(SDNode *N);

+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);

+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);

+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);

+ SDValue ExpandIntOp_SETCC(SDNode *N);

+ SDValue ExpandIntOp_Shift(SDNode *N);

+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);

+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);

+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);

+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);

+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,

+ ISD::CondCode &CCCode, DebugLoc dl);

+ //===--------------------------------------------------------------------===//

+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an

+ /// integer of the same size, this returns the integer. The integer contains

+ /// exactly the same bits as Op - only the type changed. For example, if Op

+ /// is an f32 which was softened to an i32, then this method returns an i32,

+ /// the bits of which coincide with those of Op.

+ SDValue GetSoftenedFloat(SDValue Op) {

+ SDValue &SoftenedOp = SoftenedFloats[Op];

+ RemapValue(SoftenedOp);

+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");

+ return SoftenedOp;

+ }

+ void SetSoftenedFloat(SDValue Op, SDValue Result);

+ // Result Float to Integer Conversion.

+ void SoftenFloatResult(SDNode *N, unsigned OpNo);

+ SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);

+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);

+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);

+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue SoftenFloatRes_FABS(SDNode *N);

+ SDValue SoftenFloatRes_FADD(SDNode *N);

+ SDValue SoftenFloatRes_FCEIL(SDNode *N);

+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);

+ SDValue SoftenFloatRes_FCOS(SDNode *N);

+ SDValue SoftenFloatRes_FDIV(SDNode *N);

+ SDValue SoftenFloatRes_FEXP(SDNode *N);

+ SDValue SoftenFloatRes_FEXP2(SDNode *N);

+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);

+ SDValue SoftenFloatRes_FLOG(SDNode *N);

+ SDValue SoftenFloatRes_FLOG2(SDNode *N);

+ SDValue SoftenFloatRes_FLOG10(SDNode *N);

+ SDValue SoftenFloatRes_FMUL(SDNode *N);

+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);

+ SDValue SoftenFloatRes_FNEG(SDNode *N);

+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);

+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);

+ SDValue SoftenFloatRes_FPOW(SDNode *N);

+ SDValue SoftenFloatRes_FPOWI(SDNode *N);

+ SDValue SoftenFloatRes_FREM(SDNode *N);

+ SDValue SoftenFloatRes_FRINT(SDNode *N);

+ SDValue SoftenFloatRes_FSIN(SDNode *N);

+ SDValue SoftenFloatRes_FSQRT(SDNode *N);

+ SDValue SoftenFloatRes_FSUB(SDNode *N);

+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);

+ SDValue SoftenFloatRes_LOAD(SDNode *N);

+ SDValue SoftenFloatRes_SELECT(SDNode *N);

+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);

+ SDValue SoftenFloatRes_UNDEF(SDNode *N);

+ SDValue SoftenFloatRes_VAARG(SDNode *N);

+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);

+ // Operand Float to Integer Conversion.

+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);

+ SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);

+ SDValue SoftenFloatOp_BR_CC(SDNode *N);

+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);

+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);

+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);

+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);

+ SDValue SoftenFloatOp_SETCC(SDNode *N);

+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);

+ void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,

+ ISD::CondCode &CCCode, DebugLoc dl);

+ //===--------------------------------------------------------------------===//

+ // Float Expansion Support: LegalizeFloatTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetExpandedFloat - Given a processed operand Op which was expanded into

+ /// two floating point values of half the size, this returns the two halves.

+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits

+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded

+ /// into two f64's, then this method returns the two f64's, with Lo being

+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.

+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);

+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);

+ // Float Result Expansion.

+ void ExpandFloatResult(SDNode *N, unsigned ResNo);

+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);

+ // Float Operand Expansion.

+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);

+ SDValue ExpandFloatOp_BR_CC(SDNode *N);

+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);

+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);

+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);

+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);

+ SDValue ExpandFloatOp_SETCC(SDNode *N);

+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);

+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,

+ ISD::CondCode &CCCode, DebugLoc dl);

+ //===--------------------------------------------------------------------===//

+ // Scalarization Support: LegalizeVectorTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetScalarizedVector - Given a processed one-element vector Op which was

+ /// scalarized to its element type, this returns the element. For example,

+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.

+ SDValue GetScalarizedVector(SDValue Op) {

+ SDValue &ScalarizedOp = ScalarizedVectors[Op];

+ RemapValue(ScalarizedOp);

+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");

+ return ScalarizedOp;

+ }

+ void SetScalarizedVector(SDValue Op, SDValue Result);

+ // Vector Result Scalarization: <1 x ty> -> ty.

+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);

+ SDValue ScalarizeVecRes_BinOp(SDNode *N);

+ SDValue ScalarizeVecRes_ShiftOp(SDNode *N);

+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);

+ SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);

+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);

+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);

+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);

+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);

+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);

+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);

+ SDValue ScalarizeVecRes_SELECT(SDNode *N);

+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);

+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);

+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);

+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);

+ // Vector Operand Scalarization: <1 x ty> -> ty.

+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);

+ SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);

+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);

+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);

+ //===--------------------------------------------------------------------===//

+ // Vector Splitting Support: LegalizeVectorTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetSplitVector - Given a processed vector Op which was split into smaller

+ /// vectors, this method returns the smaller vectors. The first elements of

+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide

+ /// with the elements of Hi: Op is what you would get by concatenating Lo and

+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then

+ /// this method returns the two v4i32's, with Lo corresponding to the first 4

+ /// elements of Op, and Hi to the last 4 elements.

+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);

+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);

+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.

+ void SplitVectorResult(SDNode *N, unsigned OpNo);

+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,

+ SDValue &Hi);

+ void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi);

+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.

+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);

+ SDValue SplitVecOp_UnaryOp(SDNode *N);

+ SDValue SplitVecOp_BIT_CONVERT(SDNode *N);

+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);

+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);

+ //===--------------------------------------------------------------------===//

+ // Vector Widening Support: LegalizeVectorTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// GetWidenedVector - Given a processed vector Op which was widened into a

+ /// larger vector, this method returns the larger vector. The elements of

+ /// the returned vector consist of the elements of Op followed by elements

+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a

+ /// v4i32, then this method returns a v4i32 for which the first two elements

+ /// are the same as those of Op, while the last two elements contain rubbish.

+ SDValue GetWidenedVector(SDValue Op) {

+ SDValue &WidenedOp = WidenedVectors[Op];

+ RemapValue(WidenedOp);

+ assert(WidenedOp.getNode() && "Operand wasn't widened?");

+ return WidenedOp;

+ }

+ void SetWidenedVector(SDValue Op, SDValue Result);

+ // Widen Vector Result Promotion.

+ void WidenVectorResult(SDNode *N, unsigned ResNo);

+ SDValue WidenVecRes_BIT_CONVERT(SDNode* N);

+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);

+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);

+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);

+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);

+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);

+ SDValue WidenVecRes_LOAD(SDNode* N);

+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);

+ SDValue WidenVecRes_SELECT(SDNode* N);

+ SDValue WidenVecRes_SELECT_CC(SDNode* N);

+ SDValue WidenVecRes_UNDEF(SDNode *N);

+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);

+ SDValue WidenVecRes_VSETCC(SDNode* N);

+ SDValue WidenVecRes_Binary(SDNode *N);

+ SDValue WidenVecRes_Convert(SDNode *N);

+ SDValue WidenVecRes_Shift(SDNode *N);

+ SDValue WidenVecRes_Unary(SDNode *N);

+ // Widen Vector Operand.

+ bool WidenVectorOperand(SDNode *N, unsigned ResNo);

+ SDValue WidenVecOp_BIT_CONVERT(SDNode *N);

+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);

+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);

+ SDValue WidenVecOp_STORE(SDNode* N);

+ SDValue WidenVecOp_Convert(SDNode *N);

+ //===--------------------------------------------------------------------===//

+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp

+ //===--------------------------------------------------------------------===//

+ /// Helper genWidenVectorLoads - Helper function to generate a set of

+ /// loads to load a vector with a resulting wider type. It takes

+ /// ExtType: Extension type

+ /// LdChain: list of chains for the load we have generated.

+ /// Chain: incoming chain for the ld vector.

+ /// BasePtr: base pointer to load from.

+ /// SV: memory disambiguation source value.

+ /// SVOffset: memory disambiugation offset.

+ /// Alignment: alignment of the memory.

+ /// isVolatile: volatile load.

+ /// LdWidth: width of memory that we want to load.

+ /// ResType: the wider result result type for the resulting vector.

+ /// dl: DebugLoc to be applied to new nodes

+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,

+ SDValue BasePtr, const Value *SV,

+ int SVOffset, unsigned Alignment,

+ bool isVolatile, unsigned LdWidth,

+ MVT ResType, DebugLoc dl);

+ /// Helper genWidenVectorStores - Helper function to generate a set of

+ /// stores to store a widen vector into non widen memory

+ /// It takes

+ /// StChain: list of chains for the stores we have generated

+ /// Chain: incoming chain for the ld vector

+ /// BasePtr: base pointer to load from

+ /// SV: memory disambiguation source value

+ /// SVOffset: memory disambiugation offset

+ /// Alignment: alignment of the memory

+ /// isVolatile: volatile lod

+ /// ValOp: value to store

+ /// StWidth: width of memory that we want to store

+ /// dl: DebugLoc to be applied to new nodes

+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,

+ SDValue BasePtr, const Value *SV,

+ int SVOffset, unsigned Alignment,

+ bool isVolatile, SDValue ValOp,

+ unsigned StWidth, DebugLoc dl);

+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The

+ /// input vector must have the same element type as NVT.

+ SDValue ModifyToType(SDValue InOp, MVT WidenVT);

+ //===--------------------------------------------------------------------===//

+ // Generic Splitting: LegalizeTypesGeneric.cpp

+ //===--------------------------------------------------------------------===//

+ // Legalization methods which only use that the illegal type is split into two

+ // not necessarily identical types. As such they can be used for splitting

+ // vectors and expanding integers and floats.

+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {

+ if (Op.getValueType().isVector())

+ GetSplitVector(Op, Lo, Hi);

+ else if (Op.getValueType().isInteger())

+ GetExpandedInteger(Op, Lo, Hi);

+ else

+ GetExpandedFloat(Op, Lo, Hi);

+ }

+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type

+ /// which is split (or expanded) into two not necessarily identical pieces.

+ void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT);

+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and

+ /// high parts of the given value.

+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);

+ // Generic Result Splitting.

+ void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);

+ //===--------------------------------------------------------------------===//

+ // Generic Expansion: LegalizeTypesGeneric.cpp

+ //===--------------------------------------------------------------------===//

+ // Legalization methods which only use that the illegal type is split into two

+ // identical types of half the size, and that the Lo/Hi part is stored first

+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As

+ // such they can be used for expanding integers and floats.

+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {

+ if (Op.getValueType().isInteger())

+ GetExpandedInteger(Op, Lo, Hi);

+ else

+ GetExpandedFloat(Op, Lo, Hi);

+ }

+ // Generic Result Expansion.

+ void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);

+ // Generic Operand Expansion.

+ SDValue ExpandOp_BIT_CONVERT (SDNode *N);

+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);

+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);

+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);

+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);

+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);

+};

+} // end namespace llvm.

+#endif

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..e8ff3fc9efb4
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

@@ -0,0 +1,453 @@

+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements generic type expansion and splitting for LegalizeTypes.

+// The routines here perform legalization when the details of the type (such as

+// whether it is an integer or a float) do not matter.

+// Expansion is the act of changing a computation in an illegal type to be a

+// computation in two identical registers of a smaller type.

+// Splitting is the act of changing a computation in an illegal type to be a

+// computation in two not necessarily identical registers of a smaller type.

+//

+//===----------------------------------------------------------------------===//

+#include "LegalizeTypes.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+using namespace llvm;

+//===----------------------------------------------------------------------===//

+// Generic Result Expansion.

+//===----------------------------------------------------------------------===//

+// These routines assume that the Lo/Hi part is stored first in memory on

+// little/big-endian machines, followed by the Hi/Lo part. This means that

+// they cannot be used as is on vectors, for which Lo is always stored first.

+void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT OutVT = N->getValueType(0);

+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);

+ SDValue InOp = N->getOperand(0);

+ MVT InVT = InOp.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ // Handle some special cases efficiently.

+ switch (getTypeAction(InVT)) {

+ default:

+ assert(false && "Unknown type action!");

+ case Legal:

+ case PromoteInteger:

+ break;

+ case SoftenFloat:

+ // Convert the integer operand instead.

+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);

+ return;

+ case ExpandInteger:

+ case ExpandFloat:

+ // Convert the expanded pieces of the input.

+ GetExpandedOp(InOp, Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);

+ return;

+ case SplitVector:

+ // Convert the split parts of the input if it was split in two.

+ GetSplitVector(InOp, Lo, Hi);

+ if (Lo.getValueType() == Hi.getValueType()) {

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);

+ return;

+ }

+ break;

+ case ScalarizeVector:

+ // Convert the element instead.

+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);

+ return;

+ case WidenVector: {

+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");

+ InOp = GetWidenedVector(InOp);

+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),

+ InVT.getVectorNumElements()/2);

+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(0));

+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);

+ return;

+ }

+ // Lower the bit-convert to a store/load from the stack.

+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");

+ // Create the stack frame object. Make sure it is aligned for both

+ // the source and expanded destination types.

+ unsigned Alignment =

+ TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());

+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);

+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);

+ // Emit a store to the stack slot.

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);

+ // Load the first half from the stack slot.

+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);

+ // Increment the pointer to the other half.

+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;

+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,

+ DAG.getIntPtrConstant(IncrementSize));

+ // Load the second half from the stack slot.

+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,

+ MinAlign(Alignment, IncrementSize));

+ // Handle endianness of the load.

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ // Return the operands.

+ Lo = N->getOperand(0);

+ Hi = N->getOperand(1);

+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ GetExpandedOp(N->getOperand(0), Lo, Hi);

+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?

+ Hi : Lo;

+ assert(Part.getValueType() == N->getValueType(0) &&

+ "Type twice as big as expanded type not itself expanded!");

+ GetPairElements(Part, Lo, Hi);

+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue OldVec = N->getOperand(0);

+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();

+ DebugLoc dl = N->getDebugLoc();

+ // Convert to a vector of the expanded element type, for example

+ // <3 x i64> -> <6 x i32>.

+ MVT OldVT = N->getValueType(0);

+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);

+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::getVectorVT(NewVT, 2*OldElts),

+ OldVec);

+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.

+ SDValue Idx = N->getOperand(1);

+ // Make sure the type of Idx is big enough to hold the new values.

+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))

+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);

+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);

+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);

+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,

+ DAG.getConstant(1, Idx.getValueType()));

+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");

+ DebugLoc dl = N->getDebugLoc();

+ LoadSDNode *LD = cast<LoadSDNode>(N);

+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));

+ SDValue Chain = LD->getChain();

+ SDValue Ptr = LD->getBasePtr();

+ int SVOffset = LD->getSrcValueOffset();

+ unsigned Alignment = LD->getAlignment();

+ bool isVolatile = LD->isVolatile();

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,

+ isVolatile, Alignment);

+ // Increment the pointer to the other half.

+ unsigned IncrementSize = NVT.getSizeInBits() / 8;

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),

+ SVOffset+IncrementSize,

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ // Handle endianness of the load.

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ // Modified the chain - switch anything that used the old chain to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 1), Chain);

+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {

+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue Chain = N->getOperand(0);

+ SDValue Ptr = N->getOperand(1);

+ DebugLoc dl = N->getDebugLoc();

+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));

+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));

+ // Handle endianness of the load.

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ // Modified the chain - switch anything that used the old chain to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));

+//===--------------------------------------------------------------------===//

+// Generic Operand Expansion.

+//===--------------------------------------------------------------------===//

+SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {

+ DebugLoc dl = N->getDebugLoc();

+ if (N->getValueType(0).isVector()) {

+ // An illegal expanding type is being converted to a legal vector type.

+ // Make a two element vector out of the expanded parts and convert that

+ // instead, but only if the new vector type is legal (otherwise there

+ // is no point, and it might create expansion loops). For example, on

+ // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.

+ MVT OVT = N->getOperand(0).getValueType();

+ MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2);

+ if (isTypeLegal(NVT)) {

+ SDValue Parts[2];

+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);

+ if (TLI.isBigEndian())

+ std::swap(Parts[0], Parts[1]);

+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);

+ return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);

+ }

+ // Otherwise, store to a temporary and load out again as the new type.

+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));

+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {

+ // The vector type is legal but the element type needs expansion.

+ MVT VecVT = N->getValueType(0);

+ unsigned NumElts = VecVT.getVectorNumElements();

+ MVT OldVT = N->getOperand(0).getValueType();

+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);

+ DebugLoc dl = N->getDebugLoc();

+ assert(OldVT == VecVT.getVectorElementType() &&

+ "BUILD_VECTOR operand type doesn't match vector element type!");

+ // Build a vector of twice the length out of the expanded elements.

+ // For example <3 x i64> -> <6 x i32>.

+ std::vector<SDValue> NewElts;

+ NewElts.reserve(NumElts*2);

+ for (unsigned i = 0; i < NumElts; ++i) {

+ SDValue Lo, Hi;

+ GetExpandedOp(N->getOperand(i), Lo, Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ NewElts.push_back(Lo);

+ NewElts.push_back(Hi);

+ }

+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,

+ MVT::getVectorVT(NewVT, NewElts.size()),

+ &NewElts[0], NewElts.size());

+ // Convert the new vector to the old vector type.

+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);

+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {

+ SDValue Lo, Hi;

+ GetExpandedOp(N->getOperand(0), Lo, Hi);

+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;

+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {

+ // The vector type is legal but the element type needs expansion.

+ MVT VecVT = N->getValueType(0);

+ unsigned NumElts = VecVT.getVectorNumElements();

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Val = N->getOperand(1);

+ MVT OldEVT = Val.getValueType();

+ MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);

+ assert(OldEVT == VecVT.getVectorElementType() &&

+ "Inserted element type doesn't match vector element type!");

+ // Bitconvert to a vector of twice the length with elements of the expanded

+ // type, insert the expanded vector elements, and then convert back.

+ MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);

+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,

+ NewVecVT, N->getOperand(0));

+ SDValue Lo, Hi;

+ GetExpandedOp(Val, Lo, Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ SDValue Idx = N->getOperand(2);

+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);

+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);

+ Idx = DAG.getNode(ISD::ADD, dl,

+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));

+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);

+ // Convert the new vector to the old vector type.

+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);

+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {

+ DebugLoc dl = N->getDebugLoc();

+ MVT VT = N->getValueType(0);

+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&

+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");

+ unsigned NumElts = VT.getVectorNumElements();

+ SmallVector<SDValue, 16> Ops(NumElts);

+ Ops[0] = N->getOperand(0);

+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());

+ for (unsigned i = 1; i < NumElts; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);

+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {

+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");

+ assert(OpNo == 1 && "Can only expand the stored value so far");

+ DebugLoc dl = N->getDebugLoc();

+ StoreSDNode *St = cast<StoreSDNode>(N);

+ MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());

+ SDValue Chain = St->getChain();

+ SDValue Ptr = St->getBasePtr();

+ int SVOffset = St->getSrcValueOffset();

+ unsigned Alignment = St->getAlignment();

+ bool isVolatile = St->isVolatile();

+ assert(NVT.isByteSized() && "Expanded type not byte sized!");

+ unsigned IncrementSize = NVT.getSizeInBits() / 8;

+ SDValue Lo, Hi;

+ GetExpandedOp(St->getValue(), Lo, Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,

+ isVolatile, Alignment);

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");

+ Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),

+ SVOffset + IncrementSize,

+ isVolatile, MinAlign(Alignment, IncrementSize));

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+//===--------------------------------------------------------------------===//

+// Generic Result Splitting.

+//===--------------------------------------------------------------------===//

+// Be careful to make no assumptions about which of Lo/Hi is stored first in

+// memory (for vectors it is always Lo first followed by Hi in the following

+// bytes; for integers and floats it is Lo first if and only if the machine is

+// little-endian).

+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ // A MERGE_VALUES node can produce any number of values. We know that the

+ // first illegal one needs to be expanded into Lo/Hi.

+ unsigned i;

+ // The string of legal results gets turned into input operands, which have

+ // the same type.

+ for (i = 0; isTypeLegal(N->getValueType(i)); ++i)

+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));

+ // The first illegal result must be the one that needs to be expanded.

+ GetSplitOp(N->getOperand(i), Lo, Hi);

+ // Legalize the rest of the results into the input operands whether they are

+ // legal or not.

+ unsigned e = N->getNumValues();

+ for (++i; i != e; ++i)

+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));

+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue LL, LH, RL, RH;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitOp(N->getOperand(1), LL, LH);

+ GetSplitOp(N->getOperand(2), RL, RH);

+ SDValue Cond = N->getOperand(0);

+ Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);

+ Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);

+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue LL, LH, RL, RH;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitOp(N->getOperand(2), LL, LH);

+ GetSplitOp(N->getOperand(3), RL, RH);

+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),

+ N->getOperand(1), LL, RL, N->getOperand(4));

+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),

+ N->getOperand(1), LH, RH, N->getOperand(4));

+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ Lo = DAG.getUNDEF(LoVT);

+ Hi = DAG.getUNDEF(HiVT);

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..df9af2147ca5
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

@@ -0,0 +1,335 @@

+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the SelectionDAG::LegalizeVectors method.

+//

+// The vector legalizer looks for vector operations which might need to be

+// scalarized and legalizes them. This is a separate step from Legalize because

+// scalarizing can introduce illegal types. For example, suppose we have an

+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition

+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the

+// operation, which introduces nodes with the illegal type i64 which must be

+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;

+// the operation must be unrolled, which introduces nodes with the illegal

+// type i8 which must be promoted.

+//

+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,

+// or operations that happen to take a vector which are custom-lowered like

+// ISD::CALL; the legalization for such operations never produces nodes

+// with illegal types, so it's okay to put off legalizing them until

+// SelectionDAG::Legalize runs.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/Target/TargetLowering.h"

+using namespace llvm;

+namespace {

+class VectorLegalizer {

+ SelectionDAG& DAG;

+ TargetLowering& TLI;

+ bool Changed; // Keep track of whether anything changed

+ /// LegalizedNodes - For nodes that are of legal width, and that have more

+ /// than one use, this map indicates what regularized operand to use. This

+ /// allows us to avoid legalizing the same thing more than once.

+ DenseMap<SDValue, SDValue> LegalizedNodes;

+ // Adds a node to the translation cache

+ void AddLegalizedOperand(SDValue From, SDValue To) {

+ LegalizedNodes.insert(std::make_pair(From, To));

+ // If someone requests legalization of the new node, return itself.

+ if (From != To)

+ LegalizedNodes.insert(std::make_pair(To, To));

+ }

+ // Legalizes the given node

+ SDValue LegalizeOp(SDValue Op);

+ // Assuming the node is legal, "legalize" the results

+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);

+ // Implements unrolling a generic vector operation, i.e. turning it into

+ // scalar operations.

+ SDValue UnrollVectorOp(SDValue Op);

+ // Implements unrolling a VSETCC.

+ SDValue UnrollVSETCC(SDValue Op);

+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB

+ // isn't legal.

+ SDValue ExpandFNEG(SDValue Op);

+ // Implements vector promotion; this is essentially just bitcasting the

+ // operands to a different type and bitcasting the result back to the

+ // original type.

+ SDValue PromoteVectorOp(SDValue Op);

+ public:

+ bool Run();

+ VectorLegalizer(SelectionDAG& dag) :

+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}

+};

+bool VectorLegalizer::Run() {

+ // The legalize process is inherently a bottom-up recursive process (users

+ // legalize their uses before themselves). Given infinite stack space, we

+ // could just start legalizing on the root and traverse the whole graph. In

+ // practice however, this causes us to run out of stack space on large basic

+ // blocks. To avoid this problem, compute an ordering of the nodes where each

+ // node is only legalized after all of its operands are legalized.

+ DAG.AssignTopologicalOrder();

+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),

+ E = prior(DAG.allnodes_end()); I != next(E); ++I)

+ LegalizeOp(SDValue(I, 0));

+ // Finally, it's possible the root changed. Get the new root.

+ SDValue OldRoot = DAG.getRoot();

+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");

+ DAG.setRoot(LegalizedNodes[OldRoot]);

+ LegalizedNodes.clear();

+ // Remove dead nodes now.

+ DAG.RemoveDeadNodes();

+ return Changed;

+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {

+ // Generic legalization: just pass the operand through.

+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)

+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));

+ return Result.getValue(Op.getResNo());

+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

+ // Note that LegalizeOp may be reentered even from single-use nodes, which

+ // means that we always must cache transformed nodes.

+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);

+ if (I != LegalizedNodes.end()) return I->second;

+ SDNode* Node = Op.getNode();

+ // Legalize the operands

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)

+ Ops.push_back(LegalizeOp(Node->getOperand(i)));

+ SDValue Result =

+ DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());

+ bool HasVectorValue = false;

+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();

+ J != E;

+ ++J)

+ HasVectorValue |= J->isVector();

+ if (!HasVectorValue)

+ return TranslateLegalizeResults(Op, Result);

+ switch (Op.getOpcode()) {

+ default:

+ return TranslateLegalizeResults(Op, Result);

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL:

+ case ISD::SDIV:

+ case ISD::UDIV:

+ case ISD::SREM:

+ case ISD::UREM:

+ case ISD::FADD:

+ case ISD::FSUB:

+ case ISD::FMUL:

+ case ISD::FDIV:

+ case ISD::FREM:

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR:

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::ROTL:

+ case ISD::ROTR:

+ case ISD::CTTZ:

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ case ISD::SELECT:

+ case ISD::SELECT_CC:

+ case ISD::VSETCC:

+ case ISD::ZERO_EXTEND:

+ case ISD::ANY_EXTEND:

+ case ISD::TRUNCATE:

+ case ISD::SIGN_EXTEND:

+ case ISD::SINT_TO_FP:

+ case ISD::UINT_TO_FP:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::FNEG:

+ case ISD::FABS:

+ case ISD::FSQRT:

+ case ISD::FSIN:

+ case ISD::FCOS:

+ case ISD::FPOWI:

+ case ISD::FPOW:

+ case ISD::FLOG:

+ case ISD::FLOG2:

+ case ISD::FLOG10:

+ case ISD::FEXP:

+ case ISD::FEXP2:

+ case ISD::FCEIL:

+ case ISD::FTRUNC:

+ case ISD::FRINT:

+ case ISD::FNEARBYINT:

+ case ISD::FFLOOR:

+ break;

+ }

+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {

+ case TargetLowering::Promote:

+ // "Promote" the operation by bitcasting

+ Result = PromoteVectorOp(Op);

+ Changed = true;

+ break;

+ case TargetLowering::Legal: break;

+ case TargetLowering::Custom: {

+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);

+ if (Tmp1.getNode()) {

+ Result = Tmp1;

+ break;

+ }

+ // FALL THROUGH

+ }

+ case TargetLowering::Expand:

+ if (Node->getOpcode() == ISD::FNEG)

+ Result = ExpandFNEG(Op);

+ else if (Node->getOpcode() == ISD::VSETCC)

+ Result = UnrollVSETCC(Op);

+ else

+ Result = UnrollVectorOp(Op);

+ break;

+ }

+ // Make sure that the generated code is itself legal.

+ if (Result != Op) {

+ Result = LegalizeOp(Result);

+ Changed = true;

+ }

+ // Note that LegalizeOp may be reentered even from single-use nodes, which

+ // means that we always must cache transformed nodes.

+ AddLegalizedOperand(Op, Result);

+ return Result;

+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {

+ // Vector "promotion" is basically just bitcasting and doing the operation

+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to

+ // v1i64.

+ MVT VT = Op.getValueType();

+ assert(Op.getNode()->getNumValues() == 1 &&

+ "Can't promote a vector with multiple results!");

+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);

+ DebugLoc dl = Op.getDebugLoc();

+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());

+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {

+ if (Op.getOperand(j).getValueType().isVector())

+ Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));

+ else

+ Operands[j] = Op.getOperand(j);

+ }

+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());

+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);

+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {

+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {

+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());

+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),

+ Zero, Op.getOperand(0));

+ }

+ return UnrollVectorOp(Op);

+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {

+ MVT VT = Op.getValueType();

+ unsigned NumElems = VT.getVectorNumElements();

+ MVT EltVT = VT.getVectorElementType();

+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);

+ MVT TmpEltVT = LHS.getValueType().getVectorElementType();

+ DebugLoc dl = Op.getDebugLoc();

+ SmallVector<SDValue, 8> Ops(NumElems);

+ for (unsigned i = 0; i < NumElems; ++i) {

+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,

+ DAG.getIntPtrConstant(i));

+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,

+ DAG.getIntPtrConstant(i));

+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),

+ LHSElem, RHSElem, CC);

+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],

+ DAG.getConstant(APInt::getAllOnesValue

+ (EltVT.getSizeInBits()), EltVT),

+ DAG.getConstant(0, EltVT));

+ }

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);

+/// UnrollVectorOp - We know that the given vector has a legal type, however

+/// the operation it performs is not legal, and the target has requested that

+/// the operation be expanded. "Unroll" the vector, splitting out the scalars

+/// and operating on each element individually.

+SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {

+ MVT VT = Op.getValueType();

+ assert(Op.getNode()->getNumValues() == 1 &&

+ "Can't unroll a vector with multiple results!");

+ unsigned NE = VT.getVectorNumElements();

+ MVT EltVT = VT.getVectorElementType();

+ DebugLoc dl = Op.getDebugLoc();

+ SmallVector<SDValue, 8> Scalars;

+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());

+ for (unsigned i = 0; i != NE; ++i) {

+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {

+ SDValue Operand = Op.getOperand(j);

+ MVT OperandVT = Operand.getValueType();

+ if (OperandVT.isVector()) {

+ // A vector operand; extract a single element.

+ MVT OperandEltVT = OperandVT.getVectorElementType();

+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,

+ OperandEltVT,

+ Operand,

+ DAG.getConstant(i, MVT::i32));

+ } else {

+ // A scalar operand; just use it as is.

+ Operands[j] = Operand;

+ }

+ switch (Op.getOpcode()) {

+ default:

+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT,

+ &Operands[0], Operands.size()));

+ break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::ROTL:

+ case ISD::ROTR:

+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0],

+ DAG.getShiftAmountOperand(Operands[1])));

+ break;

+ }

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size());

+bool SelectionDAG::LegalizeVectors() {

+ return VectorLegalizer(*this).Run();

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..68967cc638fd
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

@@ -0,0 +1,2151 @@

+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file performs vector type splitting and scalarization for LegalizeTypes.

+// Scalarization is the act of changing a computation in an illegal one-element

+// vector type to be a computation in its scalar element type. For example,

+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed

+// as a base case when scalarizing vector arithmetic like <4 x f32>, which

+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32

+// types.

+// Splitting is the act of changing a computation in an invalid vector type to

+// be a computation in multiple vectors of a smaller type. For example,

+// implementing <128 x f32> operations in terms of two <64 x f32> operations.

+//

+//===----------------------------------------------------------------------===//

+#include "LegalizeTypes.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/Target/TargetData.h"

+using namespace llvm;

+//===----------------------------------------------------------------------===//

+// Result Vector Scalarization: <1 x ty> -> ty.

+//===----------------------------------------------------------------------===//

+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue R = SDValue();

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "ScalarizeVectorResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to scalarize the result of this operator!");

+ abort();

+ case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;

+ case ISD::BUILD_VECTOR: R = N->getOperand(0); break;

+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;

+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;

+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;

+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;

+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;

+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;

+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;

+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;

+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;

+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;

+ case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ case ISD::CTTZ:

+ case ISD::FABS:

+ case ISD::FCOS:

+ case ISD::FNEG:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::FSIN:

+ case ISD::FSQRT:

+ case ISD::FTRUNC:

+ case ISD::FFLOOR:

+ case ISD::FCEIL:

+ case ISD::FRINT:

+ case ISD::FNEARBYINT:

+ case ISD::SINT_TO_FP:

+ case ISD::TRUNCATE:

+ case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break;

+ case ISD::ADD:

+ case ISD::AND:

+ case ISD::FADD:

+ case ISD::FDIV:

+ case ISD::FMUL:

+ case ISD::FPOW:

+ case ISD::FREM:

+ case ISD::FSUB:

+ case ISD::MUL:

+ case ISD::OR:

+ case ISD::SDIV:

+ case ISD::SREM:

+ case ISD::SUB:

+ case ISD::UDIV:

+ case ISD::UREM:

+ case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break;

+ }

+ // If R is null, the sub-method took care of registering the result.

+ if (R.getNode())

+ SetScalarizedVector(SDValue(N, ResNo), R);

+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {

+ SDValue LHS = GetScalarizedVector(N->getOperand(0));

+ SDValue RHS = GetScalarizedVector(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ LHS.getValueType(), LHS, RHS);

+SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) {

+ SDValue LHS = GetScalarizedVector(N->getOperand(0));

+ SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),

+ LHS.getValueType(), LHS, ShiftAmt);

+SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {

+ MVT NewVT = N->getValueType(0).getVectorElementType();

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),

+ NewVT, N->getOperand(0));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {

+ MVT NewVT = N->getValueType(0).getVectorElementType();

+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));

+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),

+ Op0, DAG.getValueType(NewVT),

+ DAG.getValueType(Op0.getValueType()),

+ N->getOperand(3),

+ N->getOperand(4),

+ cast<CvtRndSatSDNode>(N)->getCvtCode());

+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {

+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),

+ N->getValueType(0).getVectorElementType(),

+ N->getOperand(0), N->getOperand(1));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {

+ SDValue Op = GetScalarizedVector(N->getOperand(0));

+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),

+ Op.getValueType(), Op, N->getOperand(1));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {

+ // The value to insert may have a wider type than the vector element type,

+ // so be sure to truncate it to the element type if necessary.

+ SDValue Op = N->getOperand(1);

+ MVT EltVT = N->getValueType(0).getVectorElementType();

+ if (Op.getValueType() != EltVT)

+ // FIXME: Can this happen for floating point types?

+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);

+ return Op;

+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {

+ assert(N->isUnindexed() && "Indexed vector load?");

+ SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),

+ N->getExtensionType(),

+ N->getValueType(0).getVectorElementType(),

+ N->getChain(), N->getBasePtr(),

+ DAG.getUNDEF(N->getBasePtr().getValueType()),

+ N->getSrcValue(), N->getSrcValueOffset(),

+ N->getMemoryVT().getVectorElementType(),

+ N->isVolatile(), N->getAlignment());

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));

+ return Result;

+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {

+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.

+ MVT DestVT = N->getValueType(0).getVectorElementType();

+ SDValue Op = GetScalarizedVector(N->getOperand(0));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);

+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {

+ // If the operand is wider than the vector element type then it is implicitly

+ // truncated. Make that explicit here.

+ MVT EltVT = N->getValueType(0).getVectorElementType();

+ SDValue InOp = N->getOperand(0);

+ if (InOp.getValueType() != EltVT)

+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);

+ return InOp;

+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {

+ SDValue LHS = GetScalarizedVector(N->getOperand(1));

+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),

+ LHS.getValueType(), N->getOperand(0), LHS,

+ GetScalarizedVector(N->getOperand(2)));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {

+ SDValue LHS = GetScalarizedVector(N->getOperand(2));

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),

+ N->getOperand(0), N->getOperand(1),

+ LHS, GetScalarizedVector(N->getOperand(3)),

+ N->getOperand(4));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {

+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());

+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {

+ // Figure out if the scalar is the LHS or RHS and return it.

+ SDValue Arg = N->getOperand(2).getOperand(0);

+ if (Arg.getOpcode() == ISD::UNDEF)

+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());

+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();

+ return GetScalarizedVector(N->getOperand(Op));

+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {

+ SDValue LHS = GetScalarizedVector(N->getOperand(0));

+ SDValue RHS = GetScalarizedVector(N->getOperand(1));

+ MVT NVT = N->getValueType(0).getVectorElementType();

+ MVT SVT = TLI.getSetCCResultType(LHS.getValueType());

+ DebugLoc dl = N->getDebugLoc();

+ // Turn it into a scalar SETCC.

+ SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2));

+ // VSETCC always returns a sign-extended value, while SETCC may not. The

+ // SETCC result type may not match the vector element type. Correct these.

+ if (NVT.bitsLE(SVT)) {

+ // The SETCC result type is bigger than the vector element type.

+ // Ensure the SETCC result is sign-extended.

+ if (TLI.getBooleanContents() !=

+ TargetLowering::ZeroOrNegativeOneBooleanContent)

+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res,

+ DAG.getValueType(MVT::i1));

+ // Truncate to the final type.

+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);

+ } else {

+ // The SETCC result type is smaller than the vector element type.

+ // If the SetCC result is not sign-extended, chop it down to MVT::i1.

+ if (TLI.getBooleanContents() !=

+ TargetLowering::ZeroOrNegativeOneBooleanContent)

+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res);

+ // Sign extend to the final type.

+ return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res);

+ }

+//===----------------------------------------------------------------------===//

+// Operand Vector Scalarization <1 x ty> -> ty.

+//===----------------------------------------------------------------------===//

+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue Res = SDValue();

+ if (Res.getNode() == 0) {

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "ScalarizeVectorOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to scalarize this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT:

+ Res = ScalarizeVecOp_BIT_CONVERT(N); break;

+ case ISD::CONCAT_VECTORS:

+ Res = ScalarizeVecOp_CONCAT_VECTORS(N); break;

+ case ISD::EXTRACT_VECTOR_ELT:

+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break;

+ case ISD::STORE:

+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs

+/// to be scalarized, it must be <1 x ty>. Convert the element instead.

+SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {

+ SDValue Elt = GetScalarizedVector(N->getOperand(0));

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),

+ N->getValueType(0), Elt);

+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -

+/// use a BUILD_VECTOR instead.

+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {

+ SmallVector<SDValue, 8> Ops(N->getNumOperands());

+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)

+ Ops[i] = GetScalarizedVector(N->getOperand(i));

+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),

+ &Ops[0], Ops.size());

+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to

+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the

+/// index.

+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {

+ return GetScalarizedVector(N->getOperand(0));

+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be

+/// scalarized, it must be <1 x ty>. Just store the element.

+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){

+ assert(N->isUnindexed() && "Indexed store of one-element vector?");

+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");

+ DebugLoc dl = N->getDebugLoc();

+ if (N->isTruncatingStore())

+ return DAG.getTruncStore(N->getChain(), dl,

+ GetScalarizedVector(N->getOperand(1)),

+ N->getBasePtr(),

+ N->getSrcValue(), N->getSrcValueOffset(),

+ N->getMemoryVT().getVectorElementType(),

+ N->isVolatile(), N->getAlignment());

+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),

+ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),

+ N->isVolatile(), N->getAlignment());

+//===----------------------------------------------------------------------===//

+// Result Vector Splitting

+//===----------------------------------------------------------------------===//

+/// SplitVectorResult - This method is called when the specified result of the

+/// specified node is found to need vector splitting. At this point, the node

+/// may also have invalid operands or may have other results that need

+/// legalization, we just know that (at least) one result needs vector

+/// splitting.

+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n");

+ SDValue Lo, Hi;

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "SplitVectorResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to split the result of this operator!");

+ abort();

+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;

+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;

+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;

+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;

+ case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;

+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;

+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;

+ case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;

+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;

+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;

+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;

+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;

+ case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;

+ case ISD::VECTOR_SHUFFLE:

+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break;

+ case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break;

+ case ISD::CTTZ:

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ case ISD::FNEG:

+ case ISD::FABS:

+ case ISD::FSQRT:

+ case ISD::FSIN:

+ case ISD::FCOS:

+ case ISD::FTRUNC:

+ case ISD::FFLOOR:

+ case ISD::FCEIL:

+ case ISD::FRINT:

+ case ISD::FNEARBYINT:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::SINT_TO_FP:

+ case ISD::TRUNCATE:

+ case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break;

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL:

+ case ISD::FADD:

+ case ISD::FSUB:

+ case ISD::FMUL:

+ case ISD::SDIV:

+ case ISD::UDIV:

+ case ISD::FDIV:

+ case ISD::FPOW:

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR:

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::UREM:

+ case ISD::SREM:

+ case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break;

+ }

+ // If Lo/Hi is null, the sub-method took care of registering results etc.

+ if (Lo.getNode())

+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);

+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue LHSLo, LHSHi;

+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);

+ SDValue RHSLo, RHSHi;

+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);

+ DebugLoc dl = N->getDebugLoc();

+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);

+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);

+void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ // We know the result is a vector. The input may be either a vector or a

+ // scalar value.

+ MVT LoVT, HiVT;

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ DebugLoc dl = N->getDebugLoc();

+ SDValue InOp = N->getOperand(0);

+ MVT InVT = InOp.getValueType();

+ // Handle some special cases efficiently.

+ switch (getTypeAction(InVT)) {

+ default:

+ assert(false && "Unknown type action!");

+ case Legal:

+ case PromoteInteger:

+ case SoftenFloat:

+ case ScalarizeVector:

+ break;

+ case ExpandInteger:

+ case ExpandFloat:

+ // A scalar to vector conversion, where the scalar needs expansion.

+ // If the vector is being split in two then we can just convert the

+ // expanded pieces.

+ if (LoVT == HiVT) {

+ GetExpandedOp(InOp, Lo, Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);

+ return;

+ }

+ break;

+ case SplitVector:

+ // If the input is a vector that needs to be split, convert each split

+ // piece of the input now.

+ GetSplitVector(InOp, Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);

+ return;

+ }

+ // In the general case, convert the input to an integer and split it by hand.

+ MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits());

+ MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits());

+ if (TLI.isBigEndian())

+ std::swap(LoIntVT, HiIntVT);

+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);

+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ unsigned LoNumElts = LoVT.getVectorNumElements();

+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);

+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());

+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());

+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());

+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");

+ DebugLoc dl = N->getDebugLoc();

+ unsigned NumSubvectors = N->getNumOperands() / 2;

+ if (NumSubvectors == 1) {

+ Lo = N->getOperand(0);

+ Hi = N->getOperand(1);

+ return;

+ }

+ MVT LoVT, HiVT;

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);

+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());

+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());

+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());

+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ SDValue DTyOpLo = DAG.getValueType(LoVT);

+ SDValue DTyOpHi = DAG.getValueType(HiVT);

+ SDValue RndOp = N->getOperand(3);

+ SDValue SatOp = N->getOperand(4);

+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();

+ // Split the input.

+ SDValue VLo, VHi;

+ MVT InVT = N->getOperand(0).getValueType();

+ switch (getTypeAction(InVT)) {

+ default: assert(0 && "Unexpected type action!");

+ case Legal: {

+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");

+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),

+ LoVT.getVectorNumElements());

+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),

+ DAG.getIntPtrConstant(0));

+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),

+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));

+ break;

+ }

+ case SplitVector:

+ GetSplitVector(N->getOperand(0), VLo, VHi);

+ break;

+ case WidenVector: {

+ // If the result needs to be split and the input needs to be widened,

+ // the two types must have different lengths. Use the widened result

+ // and extract from it to do the split.

+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),

+ LoVT.getVectorNumElements());

+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(0));

+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));

+ break;

+ }

+ SDValue STyOpLo = DAG.getValueType(VLo.getValueType());

+ SDValue STyOpHi = DAG.getValueType(VHi.getValueType());

+ Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,

+ CvtCode);

+ Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,

+ CvtCode);

+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Vec = N->getOperand(0);

+ SDValue Idx = N->getOperand(1);

+ MVT IdxVT = Idx.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ MVT LoVT, HiVT;

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ // The indices are not guaranteed to be a multiple of the new vector

+ // size unless the original vector type was split in two.

+ assert(LoVT == HiVT && "Non power-of-two vectors not supported!");

+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);

+ Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,

+ DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));

+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);

+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));

+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));

+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue Vec = N->getOperand(0);

+ SDValue Elt = N->getOperand(1);

+ SDValue Idx = N->getOperand(2);

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitVector(Vec, Lo, Hi);

+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {

+ unsigned IdxVal = CIdx->getZExtValue();

+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();

+ if (IdxVal < LoNumElts)

+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,

+ Lo.getValueType(), Lo, Elt, Idx);

+ else

+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,

+ DAG.getIntPtrConstant(IdxVal - LoNumElts));

+ return;

+ }

+ // Spill the vector to the stack.

+ MVT VecVT = Vec.getValueType();

+ MVT EltVT = VecVT.getVectorElementType();

+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);

+ // Store the new element. This may be larger than the vector element type,

+ // so use a truncating store.

+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);

+ unsigned Alignment =

+ TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT());

+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);

+ // Load the Lo part from the stack slot.

+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);

+ // Increment the pointer to the other part.

+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;

+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,

+ DAG.getIntPtrConstant(IncrementSize));

+ // Load the Hi part from the stack slot.

+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,

+ MinAlign(Alignment, IncrementSize));

+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));

+ Hi = DAG.getUNDEF(HiVT);

+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,

+ SDValue &Hi) {

+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");

+ MVT LoVT, HiVT;

+ DebugLoc dl = LD->getDebugLoc();

+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);

+ ISD::LoadExtType ExtType = LD->getExtensionType();

+ SDValue Ch = LD->getChain();

+ SDValue Ptr = LD->getBasePtr();

+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

+ const Value *SV = LD->getSrcValue();

+ int SVOffset = LD->getSrcValueOffset();

+ MVT MemoryVT = LD->getMemoryVT();

+ unsigned Alignment = LD->getAlignment();

+ bool isVolatile = LD->isVolatile();

+ MVT LoMemVT, HiMemVT;

+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);

+ Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,

+ SV, SVOffset, LoMemVT, isVolatile, Alignment);

+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ SVOffset += IncrementSize;

+ Alignment = MinAlign(Alignment, IncrementSize);

+ Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,

+ SV, SVOffset, HiMemVT, isVolatile, Alignment);

+ // Build a factor node to remember that this load is independent of the

+ // other one.

+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),

+ Hi.getValue(1));

+ // Legalized the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(LD, 1), Ch);

+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ // Get the dest types - they may not match the input types, e.g. int_to_fp.

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ // Split the input.

+ MVT InVT = N->getOperand(0).getValueType();

+ switch (getTypeAction(InVT)) {

+ default: assert(0 && "Unexpected type action!");

+ case Legal: {

+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");

+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),

+ LoVT.getVectorNumElements());

+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),

+ DAG.getIntPtrConstant(0));

+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),

+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));

+ break;

+ }

+ case SplitVector:

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ break;

+ case WidenVector: {

+ // If the result needs to be split and the input needs to be widened,

+ // the two types must have different lengths. Use the widened result

+ // and extract from it to do the split.

+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),

+ LoVT.getVectorNumElements());

+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(0));

+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,

+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));

+ break;

+ }

+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);

+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);

+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,

+ SDValue &Lo, SDValue &Hi) {

+ // The low and high parts of the original input give four input vectors.

+ SDValue Inputs[4];

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);

+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);

+ MVT NewVT = Inputs[0].getValueType();

+ unsigned NewElts = NewVT.getVectorNumElements();

+ assert(NewVT == Inputs[1].getValueType() &&

+ "Non power-of-two vectors not supported!");

+ // If Lo or Hi uses elements from at most two of the four input vectors, then

+ // express it as a vector shuffle of those two inputs. Otherwise extract the

+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.

+ SmallVector<int, 16> Ops;

+ for (unsigned High = 0; High < 2; ++High) {

+ SDValue &Output = High ? Hi : Lo;

+ // Build a shuffle mask for the output, discovering on the fly which

+ // input vectors to use as shuffle operands (recorded in InputUsed).

+ // If building a suitable shuffle vector proves too hard, then bail

+ // out with useBuildVector set.

+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.

+ unsigned FirstMaskIdx = High * NewElts;

+ bool useBuildVector = false;

+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {

+ // The mask element. This indexes into the input.

+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);

+ // The input vector this mask element indexes into.

+ unsigned Input = (unsigned)Idx / NewElts;

+ if (Input >= array_lengthof(Inputs)) {

+ // The mask element does not index into any input vector.

+ Ops.push_back(-1);

+ continue;

+ }

+ // Turn the index into an offset from the start of the input vector.

+ Idx -= Input * NewElts;

+ // Find or create a shuffle vector operand to hold this input.

+ unsigned OpNo;

+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {

+ if (InputUsed[OpNo] == Input) {

+ // This input vector is already an operand.

+ break;

+ } else if (InputUsed[OpNo] == -1U) {

+ // Create a new operand for this input vector.

+ InputUsed[OpNo] = Input;

+ break;

+ }

+ if (OpNo >= array_lengthof(InputUsed)) {

+ // More than two input vectors used! Give up on trying to create a

+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.

+ useBuildVector = true;

+ break;

+ }

+ // Add the mask index for the new shuffle vector.

+ Ops.push_back(Idx + OpNo * NewElts);

+ }

+ if (useBuildVector) {

+ MVT EltVT = NewVT.getVectorElementType();

+ SmallVector<SDValue, 16> SVOps;

+ // Extract the input elements by hand.

+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {

+ // The mask element. This indexes into the input.

+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);

+ // The input vector this mask element indexes into.

+ unsigned Input = (unsigned)Idx / NewElts;

+ if (Input >= array_lengthof(Inputs)) {

+ // The mask element is "undef" or indexes off the end of the input.

+ SVOps.push_back(DAG.getUNDEF(EltVT));

+ continue;

+ }

+ // Turn the index into an offset from the start of the input vector.

+ Idx -= Input * NewElts;

+ // Extract the vector element by hand.

+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,

+ Inputs[Input], DAG.getIntPtrConstant(Idx)));

+ }

+ // Construct the Lo/Hi output using a BUILD_VECTOR.

+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());

+ } else if (InputUsed[0] == -1U) {

+ // No input vectors were used! The result is undefined.

+ Output = DAG.getUNDEF(NewVT);

+ } else {

+ SDValue Op0 = Inputs[InputUsed[0]];

+ // If only one input was used, use an undefined vector for the other.

+ SDValue Op1 = InputUsed[1] == -1U ?

+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];

+ // At least one input vector was used. Create a new shuffle vector.

+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);

+ }

+ Ops.clear();

+ }

+void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ MVT LoVT, HiVT;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);

+ SDValue LL, LH, RL, RH;

+ GetSplitVector(N->getOperand(0), LL, LH);

+ GetSplitVector(N->getOperand(1), RL, RH);

+ Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2));

+ Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2));

+//===----------------------------------------------------------------------===//

+// Operand Vector Splitting

+//===----------------------------------------------------------------------===//

+/// SplitVectorOperand - This method is called when the specified operand of the

+/// specified node is found to need vector splitting. At this point, all of the

+/// result types of the node are known to be legal, but other operands of the

+/// node may need legalization as well as the specified one.

+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {

+ DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n");

+ SDValue Res = SDValue();

+ if (Res.getNode() == 0) {

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "SplitVectorOperand Op #" << OpNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to split this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;

+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;

+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;

+ case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N),

+ OpNo); break;

+ case ISD::CTTZ:

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::SINT_TO_FP:

+ case ISD::TRUNCATE:

+ case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break;

+ }

+ // If the result is null, the sub-method took care of registering results etc.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {

+ // The result has a legal vector type, but the input needs splitting.

+ MVT ResVT = N->getValueType(0);

+ SDValue Lo, Hi;

+ DebugLoc dl = N->getDebugLoc();

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ assert(Lo.getValueType() == Hi.getValueType() &&

+ "Returns legal non-power-of-two vector type?");

+ MVT InVT = Lo.getValueType();

+ MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(),

+ InVT.getVectorNumElements());

+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);

+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);

+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);

+SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {

+ // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will

+ // end up being split all the way down to individual components. Convert the

+ // split pieces into integers and reassemble.

+ SDValue Lo, Hi;

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ Lo = BitConvertToInteger(Lo);

+ Hi = BitConvertToInteger(Hi);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),

+ JoinIntegers(Lo, Hi));

+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {

+ // We know that the extracted result type is legal. For now, assume the index

+ // is a constant.

+ MVT SubVT = N->getValueType(0);

+ SDValue Idx = N->getOperand(1);

+ DebugLoc dl = N->getDebugLoc();

+ SDValue Lo, Hi;

+ GetSplitVector(N->getOperand(0), Lo, Hi);

+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();

+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

+ if (IdxVal < LoElts) {

+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&

+ "Extracted subvector crosses vector split!");

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);

+ } else {

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,

+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));

+ }

+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {

+ SDValue Vec = N->getOperand(0);

+ SDValue Idx = N->getOperand(1);

+ MVT VecVT = Vec.getValueType();

+ if (isa<ConstantSDNode>(Idx)) {

+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");

+ SDValue Lo, Hi;

+ GetSplitVector(Vec, Lo, Hi);

+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();

+ if (IdxVal < LoElts)

+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);

+ else

+ return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,

+ DAG.getConstant(IdxVal - LoElts,

+ Idx.getValueType()));

+ }

+ // Store the vector to the stack.

+ MVT EltVT = VecVT.getVectorElementType();

+ DebugLoc dl = N->getDebugLoc();

+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);

+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);

+ // Load back the required element.

+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);

+ return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0);

+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {

+ assert(N->isUnindexed() && "Indexed store of vector?");

+ assert(OpNo == 1 && "Can only split the stored value");

+ DebugLoc dl = N->getDebugLoc();

+ bool isTruncating = N->isTruncatingStore();

+ SDValue Ch = N->getChain();

+ SDValue Ptr = N->getBasePtr();

+ int SVOffset = N->getSrcValueOffset();

+ MVT MemoryVT = N->getMemoryVT();

+ unsigned Alignment = N->getAlignment();

+ bool isVol = N->isVolatile();

+ SDValue Lo, Hi;

+ GetSplitVector(N->getOperand(1), Lo, Hi);

+ MVT LoMemVT, HiMemVT;

+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);

+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;

+ if (isTruncating)

+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,

+ LoMemVT, isVol, Alignment);

+ else

+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,

+ isVol, Alignment);

+ // Increment the pointer to the other half.

+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,

+ DAG.getIntPtrConstant(IncrementSize));

+ if (isTruncating)

+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,

+ N->getSrcValue(), SVOffset+IncrementSize,

+ HiMemVT,

+ isVol, MinAlign(Alignment, IncrementSize));

+ else

+ Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize,

+ isVol, MinAlign(Alignment, IncrementSize));

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);

+//===----------------------------------------------------------------------===//

+// Result Vector Widening

+//===----------------------------------------------------------------------===//

+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue Res = SDValue();

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "WidenVectorResult #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to widen the result of this operator!");

+ abort();

+ case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;

+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;

+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;

+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;

+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;

+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;

+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;

+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;

+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;

+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;

+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;

+ case ISD::VECTOR_SHUFFLE:

+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break;

+ case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break;

+ case ISD::ADD:

+ case ISD::AND:

+ case ISD::BSWAP:

+ case ISD::FADD:

+ case ISD::FCOPYSIGN:

+ case ISD::FDIV:

+ case ISD::FMUL:

+ case ISD::FPOW:

+ case ISD::FPOWI:

+ case ISD::FREM:

+ case ISD::FSUB:

+ case ISD::MUL:

+ case ISD::MULHS:

+ case ISD::MULHU:

+ case ISD::OR:

+ case ISD::SDIV:

+ case ISD::SREM:

+ case ISD::UDIV:

+ case ISD::UREM:

+ case ISD::SUB:

+ case ISD::XOR: Res = WidenVecRes_Binary(N); break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL: Res = WidenVecRes_Shift(N); break;

+ case ISD::ANY_EXTEND:

+ case ISD::FP_ROUND:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::SIGN_EXTEND:

+ case ISD::SINT_TO_FP:

+ case ISD::TRUNCATE:

+ case ISD::ZERO_EXTEND:

+ case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break;

+ case ISD::CTLZ:

+ case ISD::CTPOP:

+ case ISD::CTTZ:

+ case ISD::FABS:

+ case ISD::FCOS:

+ case ISD::FNEG:

+ case ISD::FSIN:

+ case ISD::FSQRT: Res = WidenVecRes_Unary(N); break;

+ }

+ // If Res is null, the sub-method took care of registering the result.

+ if (Res.getNode())

+ SetWidenedVector(SDValue(N, ResNo), Res);

+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {

+ // Binary op widening.

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));

+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);

+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

+ SDValue InOp = N->getOperand(0);

+ DebugLoc dl = N->getDebugLoc();

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ MVT InVT = InOp.getValueType();

+ MVT InEltVT = InVT.getVectorElementType();

+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);

+ unsigned Opcode = N->getOpcode();

+ unsigned InVTNumElts = InVT.getVectorNumElements();

+ if (getTypeAction(InVT) == WidenVector) {

+ InOp = GetWidenedVector(N->getOperand(0));

+ InVT = InOp.getValueType();

+ InVTNumElts = InVT.getVectorNumElements();

+ if (InVTNumElts == WidenNumElts)

+ return DAG.getNode(Opcode, dl, WidenVT, InOp);

+ }

+ if (TLI.isTypeLegal(InWidenVT)) {

+ // Because the result and the input are different vector types, widening

+ // the result could create a legal type but widening the input might make

+ // it an illegal type that might lead to repeatedly splitting the input

+ // and then widening it. To avoid this, we widen the input only if

+ // it results in a legal type.

+ if (WidenNumElts % InVTNumElts == 0) {

+ // Widen the input and call convert on the widened input vector.

+ unsigned NumConcat = WidenNumElts/InVTNumElts;

+ SmallVector<SDValue, 16> Ops(NumConcat);

+ Ops[0] = InOp;

+ SDValue UndefVal = DAG.getUNDEF(InVT);

+ for (unsigned i = 1; i != NumConcat; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(Opcode, dl, WidenVT,

+ DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,

+ &Ops[0], NumConcat));

+ }

+ if (InVTNumElts % WidenNumElts == 0) {

+ // Extract the input and convert the shorten input vector.

+ return DAG.getNode(Opcode, dl, WidenVT,

+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,

+ InOp, DAG.getIntPtrConstant(0)));

+ }

+ // Otherwise unroll into some nasty scalar code and rebuild the vector.

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ MVT EltVT = WidenVT.getVectorElementType();

+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);

+ unsigned i;

+ for (i=0; i < MinElts; ++i)

+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,

+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,

+ DAG.getIntPtrConstant(i)));

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for (; i < WidenNumElts; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);

+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ SDValue ShOp = N->getOperand(1);

+ MVT ShVT = ShOp.getValueType();

+ if (getTypeAction(ShVT) == WidenVector) {

+ ShOp = GetWidenedVector(ShOp);

+ ShVT = ShOp.getValueType();

+ }

+ MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),

+ WidenVT.getVectorNumElements());

+ if (ShVT != ShWidenVT)

+ ShOp = ModifyToType(ShOp, ShWidenVT);

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);

+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {

+ // Unary op widening.

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);

+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {

+ SDValue InOp = N->getOperand(0);

+ MVT InVT = InOp.getValueType();

+ MVT VT = N->getValueType(0);

+ MVT WidenVT = TLI.getTypeToTransformTo(VT);

+ DebugLoc dl = N->getDebugLoc();

+ switch (getTypeAction(InVT)) {

+ default:

+ assert(false && "Unknown type action!");

+ break;

+ case Legal:

+ break;

+ case PromoteInteger:

+ // If the InOp is promoted to the same size, convert it. Otherwise,

+ // fall out of the switch and widen the promoted input.

+ InOp = GetPromotedInteger(InOp);

+ InVT = InOp.getValueType();

+ if (WidenVT.bitsEq(InVT))

+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);

+ break;

+ case SoftenFloat:

+ case ExpandInteger:

+ case ExpandFloat:

+ case ScalarizeVector:

+ case SplitVector:

+ break;

+ case WidenVector:

+ // If the InOp is widened to the same size, convert it. Otherwise, fall

+ // out of the switch and widen the widened input.

+ InOp = GetWidenedVector(InOp);

+ InVT = InOp.getValueType();

+ if (WidenVT.bitsEq(InVT))

+ // The input widens to the same size. Convert to the widen value.

+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);

+ break;

+ }

+ unsigned WidenSize = WidenVT.getSizeInBits();

+ unsigned InSize = InVT.getSizeInBits();

+ if (WidenSize % InSize == 0) {

+ // Determine new input vector type. The new input vector type will use

+ // the same element type (if its a vector) or use the input type as a

+ // vector. It is the same size as the type to widen to.

+ MVT NewInVT;

+ unsigned NewNumElts = WidenSize / InSize;

+ if (InVT.isVector()) {

+ MVT InEltVT = InVT.getVectorElementType();

+ NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());

+ } else {

+ NewInVT = MVT::getVectorVT(InVT, NewNumElts);

+ }

+ if (TLI.isTypeLegal(NewInVT)) {

+ // Because the result and the input are different vector types, widening

+ // the result could create a legal type but widening the input might make

+ // it an illegal type that might lead to repeatedly splitting the input

+ // and then widening it. To avoid this, we widen the input only if

+ // it results in a legal type.

+ SmallVector<SDValue, 16> Ops(NewNumElts);

+ SDValue UndefVal = DAG.getUNDEF(InVT);

+ Ops[0] = InOp;

+ for (unsigned i = 1; i < NewNumElts; ++i)

+ Ops[i] = UndefVal;

+ SDValue NewVec;

+ if (InVT.isVector())

+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,

+ NewInVT, &Ops[0], NewNumElts);

+ else

+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,

+ NewInVT, &Ops[0], NewNumElts);

+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);

+ }

+ // This should occur rarely. Lower the bit-convert to a store/load

+ // from the stack. Create the stack frame object. Make sure it is aligned

+ // for both the source and destination types.

+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);

+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(FI);

+ // Emit a store to the stack slot.

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);

+ // Result is a load from the stack slot.

+ return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0);

+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {

+ DebugLoc dl = N->getDebugLoc();

+ // Build a vector with undefined for the new nodes.

+ MVT VT = N->getValueType(0);

+ MVT EltVT = VT.getVectorElementType();

+ unsigned NumElts = VT.getVectorNumElements();

+ MVT WidenVT = TLI.getTypeToTransformTo(VT);

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());

+ NewOps.reserve(WidenNumElts);

+ for (unsigned i = NumElts; i < WidenNumElts; ++i)

+ NewOps.push_back(DAG.getUNDEF(EltVT));

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());

+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {

+ MVT InVT = N->getOperand(0).getValueType();

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ DebugLoc dl = N->getDebugLoc();

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ unsigned NumOperands = N->getNumOperands();

+ bool InputWidened = false; // Indicates we need to widen the input.

+ if (getTypeAction(InVT) != WidenVector) {

+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {

+ // Add undef vectors to widen to correct length.

+ unsigned NumConcat = WidenVT.getVectorNumElements() /

+ InVT.getVectorNumElements();

+ SDValue UndefVal = DAG.getUNDEF(InVT);

+ SmallVector<SDValue, 16> Ops(NumConcat);

+ for (unsigned i=0; i < NumOperands; ++i)

+ Ops[i] = N->getOperand(i);

+ for (unsigned i = NumOperands; i != NumConcat; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);

+ }

+ } else {

+ InputWidened = true;

+ if (WidenVT == TLI.getTypeToTransformTo(InVT)) {

+ // The inputs and the result are widen to the same value.

+ unsigned i;

+ for (i=1; i < NumOperands; ++i)

+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)

+ break;

+ if (i > NumOperands)

+ // Everything but the first operand is an UNDEF so just return the

+ // widened first operand.

+ return GetWidenedVector(N->getOperand(0));

+ if (NumOperands == 2) {

+ // Replace concat of two operands with a shuffle.

+ SmallVector<int, 16> MaskOps(WidenNumElts);

+ for (unsigned i=0; i < WidenNumElts/2; ++i) {

+ MaskOps[i] = i;

+ MaskOps[i+WidenNumElts/2] = i+WidenNumElts;

+ }

+ return DAG.getVectorShuffle(WidenVT, dl,

+ GetWidenedVector(N->getOperand(0)),

+ GetWidenedVector(N->getOperand(1)),

+ &MaskOps[0]);

+ }

+ // Fall back to use extracts and build vector.

+ MVT EltVT = WidenVT.getVectorElementType();

+ unsigned NumInElts = InVT.getVectorNumElements();

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ unsigned Idx = 0;

+ for (unsigned i=0; i < NumOperands; ++i) {

+ SDValue InOp = N->getOperand(i);

+ if (InputWidened)

+ InOp = GetWidenedVector(InOp);

+ for (unsigned j=0; j < NumInElts; ++j)

+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,

+ DAG.getIntPtrConstant(j));

+ }

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for (; Idx < WidenNumElts; ++Idx)

+ Ops[Idx] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);

+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {

+ DebugLoc dl = N->getDebugLoc();

+ SDValue InOp = N->getOperand(0);

+ SDValue RndOp = N->getOperand(3);

+ SDValue SatOp = N->getOperand(4);

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ MVT InVT = InOp.getValueType();

+ MVT InEltVT = InVT.getVectorElementType();

+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);

+ SDValue DTyOp = DAG.getValueType(WidenVT);

+ SDValue STyOp = DAG.getValueType(InWidenVT);

+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();

+ unsigned InVTNumElts = InVT.getVectorNumElements();

+ if (getTypeAction(InVT) == WidenVector) {

+ InOp = GetWidenedVector(InOp);

+ InVT = InOp.getValueType();

+ InVTNumElts = InVT.getVectorNumElements();

+ if (InVTNumElts == WidenNumElts)

+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,

+ SatOp, CvtCode);

+ }

+ if (TLI.isTypeLegal(InWidenVT)) {

+ // Because the result and the input are different vector types, widening

+ // the result could create a legal type but widening the input might make

+ // it an illegal type that might lead to repeatedly splitting the input

+ // and then widening it. To avoid this, we widen the input only if

+ // it results in a legal type.

+ if (WidenNumElts % InVTNumElts == 0) {

+ // Widen the input and call convert on the widened input vector.

+ unsigned NumConcat = WidenNumElts/InVTNumElts;

+ SmallVector<SDValue, 16> Ops(NumConcat);

+ Ops[0] = InOp;

+ SDValue UndefVal = DAG.getUNDEF(InVT);

+ for (unsigned i = 1; i != NumConcat; ++i) {

+ Ops[i] = UndefVal;

+ }

+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);

+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,

+ SatOp, CvtCode);

+ }

+ if (InVTNumElts % WidenNumElts == 0) {

+ // Extract the input and convert the shorten input vector.

+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,

+ DAG.getIntPtrConstant(0));

+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,

+ SatOp, CvtCode);

+ }

+ // Otherwise unroll into some nasty scalar code and rebuild the vector.

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ MVT EltVT = WidenVT.getVectorElementType();

+ DTyOp = DAG.getValueType(EltVT);

+ STyOp = DAG.getValueType(InEltVT);

+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);

+ unsigned i;

+ for (i=0; i < MinElts; ++i) {

+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,

+ DAG.getIntPtrConstant(i));

+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,

+ SatOp, CvtCode);

+ }

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for (; i < WidenNumElts; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);

+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {

+ MVT VT = N->getValueType(0);

+ MVT WidenVT = TLI.getTypeToTransformTo(VT);

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SDValue InOp = N->getOperand(0);

+ SDValue Idx = N->getOperand(1);

+ DebugLoc dl = N->getDebugLoc();

+ if (getTypeAction(InOp.getValueType()) == WidenVector)

+ InOp = GetWidenedVector(InOp);

+ MVT InVT = InOp.getValueType();

+ ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);

+ if (CIdx) {

+ unsigned IdxVal = CIdx->getZExtValue();

+ // Check if we can just return the input vector after widening.

+ if (IdxVal == 0 && InVT == WidenVT)

+ return InOp;

+ // Check if we can extract from the vector.

+ unsigned InNumElts = InVT.getVectorNumElements();

+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);

+ }

+ // We could try widening the input to the right length but for now, extract

+ // the original elements, fill the rest with undefs and build a vector.

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ MVT EltVT = VT.getVectorElementType();

+ MVT IdxVT = Idx.getValueType();

+ unsigned NumElts = VT.getVectorNumElements();

+ unsigned i;

+ if (CIdx) {

+ unsigned IdxVal = CIdx->getZExtValue();

+ for (i=0; i < NumElts; ++i)

+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,

+ DAG.getConstant(IdxVal+i, IdxVT));

+ } else {

+ Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);

+ for (i=1; i < NumElts; ++i) {

+ SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,

+ DAG.getConstant(i, IdxVT));

+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);

+ }

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for (; i < WidenNumElts; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);

+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),

+ InOp.getValueType(), InOp,

+ N->getOperand(1), N->getOperand(2));

+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {

+ LoadSDNode *LD = cast<LoadSDNode>(N);

+ MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));

+ MVT LdVT = LD->getMemoryVT();

+ DebugLoc dl = N->getDebugLoc();

+ assert(LdVT.isVector() && WidenVT.isVector());

+ // Load information

+ SDValue Chain = LD->getChain();

+ SDValue BasePtr = LD->getBasePtr();

+ int SVOffset = LD->getSrcValueOffset();

+ unsigned Align = LD->getAlignment();

+ bool isVolatile = LD->isVolatile();

+ const Value *SV = LD->getSrcValue();

+ ISD::LoadExtType ExtType = LD->getExtensionType();

+ SDValue Result;

+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load

+ if (ExtType != ISD::NON_EXTLOAD) {

+ // For extension loads, we can not play the tricks of chopping legal

+ // vector types and bit cast it to the right type. Instead, we unroll

+ // the load and build a vector.

+ MVT EltVT = WidenVT.getVectorElementType();

+ MVT LdEltVT = LdVT.getVectorElementType();

+ unsigned NumElts = LdVT.getVectorNumElements();

+ // Load each element and widen

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ unsigned Increment = LdEltVT.getSizeInBits() / 8;

+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,

+ LdEltVT, isVolatile, Align);

+ LdChain.push_back(Ops[0].getValue(1));

+ unsigned i = 0, Offset = Increment;

+ for (i=1; i < NumElts; ++i, Offset += Increment) {

+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),

+ BasePtr, DAG.getIntPtrConstant(Offset));

+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,

+ SVOffset + Offset, LdEltVT, isVolatile, Align);

+ LdChain.push_back(Ops[i].getValue(1));

+ }

+ // Fill the rest with undefs

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for (; i != WidenNumElts; ++i)

+ Ops[i] = UndefVal;

+ Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());

+ } else {

+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());

+ unsigned int LdWidth = LdVT.getSizeInBits();

+ Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,

+ Align, isVolatile, LdWidth, WidenVT, dl);

+ }

+ // If we generate a single load, we can use that for the chain. Otherwise,

+ // build a factor node to remember the multiple loads are independent and

+ // chain to that.

+ SDValue NewChain;

+ if (LdChain.size() == 1)

+ NewChain = LdChain[0];

+ else

+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],

+ LdChain.size());

+ // Modified the chain - switch anything that used the old chain to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 1), Chain);

+ return Result;

+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),

+ WidenVT, N->getOperand(0));

+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SDValue Cond1 = N->getOperand(0);

+ MVT CondVT = Cond1.getValueType();

+ if (CondVT.isVector()) {

+ MVT CondEltVT = CondVT.getVectorElementType();

+ MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts);

+ if (getTypeAction(CondVT) == WidenVector)

+ Cond1 = GetWidenedVector(Cond1);

+ if (Cond1.getValueType() != CondWidenVT)

+ Cond1 = ModifyToType(Cond1, CondWidenVT);

+ }

+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));

+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));

+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);

+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),

+ WidenVT, Cond1, InOp1, InOp2);

+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {

+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));

+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));

+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),

+ InOp1.getValueType(), N->getOperand(0),

+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));

+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ return DAG.getUNDEF(WidenVT);

+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ MVT WidenVT = TLI.getTypeToTransformTo(VT);

+ unsigned NumElts = VT.getVectorNumElements();

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));

+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));

+ // Adjust mask based on new input vector length.

+ SmallVector<int, 16> NewMask;

+ for (unsigned i = 0; i != NumElts; ++i) {

+ int Idx = N->getMaskElt(i);

+ if (Idx < (int)NumElts)

+ NewMask.push_back(Idx);

+ else

+ NewMask.push_back(Idx - NumElts + WidenNumElts);

+ }

+ for (unsigned i = NumElts; i != WidenNumElts; ++i)

+ NewMask.push_back(-1);

+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);

+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {

+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));

+ unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ SDValue InOp1 = N->getOperand(0);

+ MVT InVT = InOp1.getValueType();

+ assert(InVT.isVector() && "can not widen non vector type");

+ MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);

+ InOp1 = GetWidenedVector(InOp1);

+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));

+ // Assume that the input and output will be widen appropriately. If not,

+ // we will have to unroll it at some point.

+ assert(InOp1.getValueType() == WidenInVT &&

+ InOp2.getValueType() == WidenInVT &&

+ "Input not widened to expected type!");

+ return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),

+ WidenVT, InOp1, InOp2, N->getOperand(2));

+//===----------------------------------------------------------------------===//

+// Widen Vector Operand

+//===----------------------------------------------------------------------===//

+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {

+ DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);

+ cerr << "\n");

+ SDValue Res = SDValue();

+ switch (N->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ cerr << "WidenVectorOperand op #" << ResNo << ": ";

+ N->dump(&DAG); cerr << "\n";

+#endif

+ assert(0 && "Do not know how to widen this operator's operand!");

+ abort();

+ case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;

+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;

+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;

+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;

+ case ISD::FP_ROUND:

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT:

+ case ISD::SINT_TO_FP:

+ case ISD::TRUNCATE:

+ case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break;

+ }

+ // If Res is null, the sub-method took care of registering the result.

+ if (!Res.getNode()) return false;

+ // If the result is N, the sub-method updated N in place. Tell the legalizer

+ // core about this.

+ if (Res.getNode() == N)

+ return true;

+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

+ "Invalid operand expansion");

+ ReplaceValueWith(SDValue(N, 0), Res);

+ return false;

+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {

+ // Since the result is legal and the input is illegal, it is unlikely

+ // that we can fix the input to a legal type so unroll the convert

+ // into some scalar code and create a nasty build vector.

+ MVT VT = N->getValueType(0);

+ MVT EltVT = VT.getVectorElementType();

+ DebugLoc dl = N->getDebugLoc();

+ unsigned NumElts = VT.getVectorNumElements();

+ SDValue InOp = N->getOperand(0);

+ if (getTypeAction(InOp.getValueType()) == WidenVector)

+ InOp = GetWidenedVector(InOp);

+ MVT InVT = InOp.getValueType();

+ MVT InEltVT = InVT.getVectorElementType();

+ unsigned Opcode = N->getOpcode();

+ SmallVector<SDValue, 16> Ops(NumElts);

+ for (unsigned i=0; i < NumElts; ++i)

+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,

+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,

+ DAG.getIntPtrConstant(i)));

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);

+SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {

+ MVT VT = N->getValueType(0);

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ MVT InWidenVT = InOp.getValueType();

+ DebugLoc dl = N->getDebugLoc();

+ // Check if we can convert between two legal vector types and extract.

+ unsigned InWidenSize = InWidenVT.getSizeInBits();

+ unsigned Size = VT.getSizeInBits();

+ if (InWidenSize % Size == 0 && !VT.isVector()) {

+ unsigned NewNumElts = InWidenSize / Size;

+ MVT NewVT = MVT::getVectorVT(VT, NewNumElts);

+ if (TLI.isTypeLegal(NewVT)) {

+ SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);

+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,

+ DAG.getIntPtrConstant(0));

+ }

+ // Lower the bit-convert to a store/load from the stack. Create the stack

+ // frame object. Make sure it is aligned for both the source and destination

+ // types.

+ SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT);

+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();

+ const Value *SV = PseudoSourceValue::getFixedStack(FI);

+ // Emit a store to the stack slot.

+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);

+ // Result is a load from the stack slot.

+ return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0);

+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {

+ // If the input vector is not legal, it is likely that we will not find a

+ // legal vector of the same size. Replace the concatenate vector with a

+ // nasty build vector.

+ MVT VT = N->getValueType(0);

+ MVT EltVT = VT.getVectorElementType();

+ DebugLoc dl = N->getDebugLoc();

+ unsigned NumElts = VT.getVectorNumElements();

+ SmallVector<SDValue, 16> Ops(NumElts);

+ MVT InVT = N->getOperand(0).getValueType();

+ unsigned NumInElts = InVT.getVectorNumElements();

+ unsigned Idx = 0;

+ unsigned NumOperands = N->getNumOperands();

+ for (unsigned i=0; i < NumOperands; ++i) {

+ SDValue InOp = N->getOperand(i);

+ if (getTypeAction(InOp.getValueType()) == WidenVector)

+ InOp = GetWidenedVector(InOp);

+ for (unsigned j=0; j < NumInElts; ++j)

+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,

+ DAG.getIntPtrConstant(j));

+ }

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);

+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {

+ SDValue InOp = GetWidenedVector(N->getOperand(0));

+ MVT EltVT = InOp.getValueType().getVectorElementType();

+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),

+ EltVT, InOp, N->getOperand(1));

+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {

+ // We have to widen the value but we want only to store the original

+ // vector type.

+ StoreSDNode *ST = cast<StoreSDNode>(N);

+ SDValue Chain = ST->getChain();

+ SDValue BasePtr = ST->getBasePtr();

+ const Value *SV = ST->getSrcValue();

+ int SVOffset = ST->getSrcValueOffset();

+ unsigned Align = ST->getAlignment();

+ bool isVolatile = ST->isVolatile();

+ SDValue ValOp = GetWidenedVector(ST->getValue());

+ DebugLoc dl = N->getDebugLoc();

+ MVT StVT = ST->getMemoryVT();

+ MVT ValVT = ValOp.getValueType();

+ // It must be true that we the widen vector type is bigger than where

+ // we need to store.

+ assert(StVT.isVector() && ValOp.getValueType().isVector());

+ assert(StVT.bitsLT(ValOp.getValueType()));

+ SmallVector<SDValue, 16> StChain;

+ if (ST->isTruncatingStore()) {

+ // For truncating stores, we can not play the tricks of chopping legal

+ // vector types and bit cast it to the right type. Instead, we unroll

+ // the store.

+ MVT StEltVT = StVT.getVectorElementType();

+ MVT ValEltVT = ValVT.getVectorElementType();

+ unsigned Increment = ValEltVT.getSizeInBits() / 8;

+ unsigned NumElts = StVT.getVectorNumElements();

+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,

+ DAG.getIntPtrConstant(0));

+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,

+ SVOffset, StEltVT,

+ isVolatile, Align));

+ unsigned Offset = Increment;

+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {

+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),

+ BasePtr, DAG.getIntPtrConstant(Offset));

+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,

+ DAG.getIntPtrConstant(0));

+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,

+ SVOffset + Offset, StEltVT,

+ isVolatile, MinAlign(Align, Offset)));

+ }

+ else {

+ assert(StVT.getVectorElementType() == ValVT.getVectorElementType());

+ // Store value

+ GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,

+ Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);

+ }

+ if (StChain.size() == 1)

+ return StChain[0];

+ else

+ return DAG.getNode(ISD::TokenFactor, dl,

+ MVT::Other,&StChain[0],StChain.size());

+//===----------------------------------------------------------------------===//

+// Vector Widening Utilities

+//===----------------------------------------------------------------------===//

+// Utility function to find a vector type and its associated element

+// type from a preferred width and whose vector type must be the same size

+// as the VecVT.

+// TLI: Target lowering used to determine legal types.

+// Width: Preferred width to store.

+// VecVT: Vector value type whose size we must match.

+// Returns NewVecVT and NewEltVT - the vector type and its associated

+// element type.

+static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,

+ MVT VecVT,

+ MVT& NewEltVT, MVT& NewVecVT) {

+ unsigned EltWidth = Width + 1;

+ if (TLI.isTypeLegal(VecVT)) {

+ // We start with the preferred with, making it a power of 2 and find a

+ // legal vector type of that width. If not, we reduce it by another of 2.

+ // For incoming type is legal, this process will end as a vector of the

+ // smallest loadable type should always be legal.

+ do {

+ assert(EltWidth > 0);

+ EltWidth = 1 << Log2_32(EltWidth - 1);

+ NewEltVT = MVT::getIntegerVT(EltWidth);

+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;

+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);

+ } while (!TLI.isTypeLegal(NewVecVT) ||

+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());

+ } else {

+ // The incoming vector type is illegal and is the result of widening

+ // a vector to a power of 2. In this case, we will use the preferred

+ // with as long as it is a multiple of the incoming vector length.

+ // The legalization process will eventually make this into a legal type

+ // and remove the illegal bit converts (which would turn to stack converts

+ // if they are allow to exist).

+ do {

+ assert(EltWidth > 0);

+ EltWidth = 1 << Log2_32(EltWidth - 1);

+ NewEltVT = MVT::getIntegerVT(EltWidth);

+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;

+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);

+ } while (!TLI.isTypeLegal(NewEltVT) ||

+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());

+ }

+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,

+ SDValue Chain,

+ SDValue BasePtr,

+ const Value *SV,

+ int SVOffset,

+ unsigned Alignment,

+ bool isVolatile,

+ unsigned LdWidth,

+ MVT ResType,

+ DebugLoc dl) {

+ // The strategy assumes that we can efficiently load powers of two widths.

+ // The routines chops the vector into the largest power of 2 load and

+ // can be inserted into a legal vector and then cast the result into the

+ // vector type we want. This avoids unnecessary stack converts.

+ // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and

+ // the load is nonvolatile, we an use a wider load for the value.

+ // Find the vector type that can load from.

+ MVT NewEltVT, NewVecVT;

+ unsigned NewEltVTWidth;

+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);

+ NewEltVTWidth = NewEltVT.getSizeInBits();

+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,

+ isVolatile, Alignment);

+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);

+ LdChain.push_back(LdOp.getValue(1));

+ // Check if we can load the element with one instruction

+ if (LdWidth == NewEltVTWidth) {

+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);

+ }

+ unsigned Idx = 1;

+ LdWidth -= NewEltVTWidth;

+ unsigned Offset = 0;

+ while (LdWidth > 0) {

+ unsigned Increment = NewEltVTWidth / 8;

+ Offset += Increment;

+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

+ DAG.getIntPtrConstant(Increment));

+ if (LdWidth < NewEltVTWidth) {

+ // Our current type we are using is too large, use a smaller size by

+ // using a smaller power of 2

+ unsigned oNewEltVTWidth = NewEltVTWidth;

+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);

+ NewEltVTWidth = NewEltVT.getSizeInBits();

+ // Readjust position and vector position based on new load type

+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);

+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);

+ }

+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,

+ SVOffset+Offset, isVolatile,

+ MinAlign(Alignment, Offset));

+ LdChain.push_back(LdOp.getValue(1));

+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,

+ DAG.getIntPtrConstant(Idx++));

+ LdWidth -= NewEltVTWidth;

+ }

+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);

+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,

+ SDValue Chain,

+ SDValue BasePtr,

+ const Value *SV,

+ int SVOffset,

+ unsigned Alignment,

+ bool isVolatile,

+ SDValue ValOp,

+ unsigned StWidth,

+ DebugLoc dl) {

+ // Breaks the stores into a series of power of 2 width stores. For any

+ // width, we convert the vector to the vector of element size that we

+ // want to store. This avoids requiring a stack convert.

+ // Find a width of the element type we can store with

+ MVT WidenVT = ValOp.getValueType();

+ MVT NewEltVT, NewVecVT;

+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);

+ unsigned NewEltVTWidth = NewEltVT.getSizeInBits();

+ SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);

+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,

+ DAG.getIntPtrConstant(0));

+ SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,

+ isVolatile, Alignment);

+ StChain.push_back(StOp);

+ // Check if we are done

+ if (StWidth == NewEltVTWidth) {

+ return;

+ }

+ unsigned Idx = 1;

+ StWidth -= NewEltVTWidth;

+ unsigned Offset = 0;

+ while (StWidth > 0) {

+ unsigned Increment = NewEltVTWidth / 8;

+ Offset += Increment;

+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

+ DAG.getIntPtrConstant(Increment));

+ if (StWidth < NewEltVTWidth) {

+ // Our current type we are using is too large, use a smaller size by

+ // using a smaller power of 2

+ unsigned oNewEltVTWidth = NewEltVTWidth;

+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);

+ NewEltVTWidth = NewEltVT.getSizeInBits();

+ // Readjust position and vector position based on new load type

+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);

+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);

+ }

+ EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,

+ DAG.getIntPtrConstant(Idx++));

+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,

+ SVOffset + Offset, isVolatile,

+ MinAlign(Alignment, Offset)));

+ StWidth -= NewEltVTWidth;

+ }

+/// Modifies a vector input (widen or narrows) to a vector of NVT. The

+/// input vector must have the same element type as NVT.

+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {

+ // Note that InOp might have been widened so it might already have

+ // the right width or it might need be narrowed.

+ MVT InVT = InOp.getValueType();

+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&

+ "input and widen element type must match");

+ DebugLoc dl = InOp.getDebugLoc();

+ // Check if InOp already has the right width.

+ if (InVT == NVT)

+ return InOp;

+ unsigned InNumElts = InVT.getVectorNumElements();

+ unsigned WidenNumElts = NVT.getVectorNumElements();

+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {

+ unsigned NumConcat = WidenNumElts / InNumElts;

+ SmallVector<SDValue, 16> Ops(NumConcat);

+ SDValue UndefVal = DAG.getUNDEF(InVT);

+ Ops[0] = InOp;

+ for (unsigned i = 1; i != NumConcat; ++i)

+ Ops[i] = UndefVal;

+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);

+ }

+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,

+ DAG.getIntPtrConstant(0));

+ // Fall back to extract and build.

+ SmallVector<SDValue, 16> Ops(WidenNumElts);

+ MVT EltVT = NVT.getVectorElementType();

+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);

+ unsigned Idx;

+ for (Idx = 0; Idx < MinNumElts; ++Idx)

+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,

+ DAG.getIntPtrConstant(Idx));

+ SDValue UndefVal = DAG.getUNDEF(EltVT);

+ for ( ; Idx < WidenNumElts; ++Idx)

+ Ops[Idx] = UndefVal;

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);

diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 000000000000..185222ade98e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile

@@ -0,0 +1,15 @@

+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##

+# The LLVM Compiler Infrastructure

+# This file is distributed under the University of Illinois Open Source

+# License. See LICENSE.TXT for details.

+##===----------------------------------------------------------------------===##

+LEVEL = ../../..

+LIBRARYNAME = LLVMSelectionDAG

+PARALLEL_DIRS =

+BUILD_ARCHIVE = 1

+DONT_BUILD_RELINKED = 1

+include $(LEVEL)/Makefile.common

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..af73b28fae93
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

@@ -0,0 +1,635 @@

+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements a fast scheduler.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "pre-RA-sched"

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/CodeGen/SchedulerRegistry.h"

+#include "llvm/CodeGen/SelectionDAGISel.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/ADT/SmallSet.h"

+#include "llvm/ADT/Statistic.h"

+#include "llvm/ADT/STLExtras.h"

+#include "llvm/Support/CommandLine.h"

+using namespace llvm;

+STATISTIC(NumUnfolds, "Number of nodes unfolded");

+STATISTIC(NumDups, "Number of duplicated nodes");

+STATISTIC(NumPRCopies, "Number of physical copies");

+static RegisterScheduler

+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",

+ createFastDAGScheduler);

+namespace {

+ /// FastPriorityQueue - A degenerate priority queue that considers

+ /// all nodes to have the same priority.

+ ///

+ struct VISIBILITY_HIDDEN FastPriorityQueue {

+ SmallVector<SUnit *, 16> Queue;

+ bool empty() const { return Queue.empty(); }

+ void push(SUnit *U) {

+ Queue.push_back(U);

+ }

+ SUnit *pop() {

+ if (empty()) return NULL;

+ SUnit *V = Queue.back();

+ Queue.pop_back();

+ return V;

+ }

+ };

+//===----------------------------------------------------------------------===//

+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.

+///

+class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {

+private:

+ /// AvailableQueue - The priority queue to use for the available SUnits.

+ FastPriorityQueue AvailableQueue;

+ /// LiveRegDefs - A set of physical registers and their definition

+ /// that are "live". These nodes must be scheduled before any other nodes that

+ /// modifies the registers can be scheduled.

+ unsigned NumLiveRegs;

+ std::vector<SUnit*> LiveRegDefs;

+ std::vector<unsigned> LiveRegCycles;

+public:

+ ScheduleDAGFast(MachineFunction &mf)

+ : ScheduleDAGSDNodes(mf) {}

+ void Schedule();

+ /// AddPred - adds a predecessor edge to SUnit SU.

+ /// This returns true if this is a new predecessor.

+ void AddPred(SUnit *SU, const SDep &D) {

+ SU->addPred(D);

+ }

+ /// RemovePred - removes a predecessor edge from SUnit SU.

+ /// This returns true if an edge was removed.

+ void RemovePred(SUnit *SU, const SDep &D) {

+ SU->removePred(D);

+ }

+private:

+ void ReleasePred(SUnit *SU, SDep *PredEdge);

+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);

+ void ScheduleNodeBottomUp(SUnit*, unsigned);

+ SUnit *CopyAndMoveSuccessors(SUnit*);

+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,

+ const TargetRegisterClass*,

+ SmallVector<SUnit*, 2>&);

+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);

+ void ListScheduleBottomUp();

+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.

+ bool ForceUnitLatencies() const { return true; }

+};

+} // end anonymous namespace

+/// Schedule - Schedule the DAG using list scheduling.

+void ScheduleDAGFast::Schedule() {

+ DOUT << "********** List Scheduling **********\n";

+ NumLiveRegs = 0;

+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);

+ LiveRegCycles.resize(TRI->getNumRegs(), 0);

+ // Build the scheduling graph.

+ BuildSchedGraph();

+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)

+ SUnits[su].dumpAll(this));

+ // Execute the actual scheduling loop.

+ ListScheduleBottomUp();

+//===----------------------------------------------------------------------===//

+// Bottom-Up Scheduling

+//===----------------------------------------------------------------------===//

+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to

+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.

+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {

+ SUnit *PredSU = PredEdge->getSUnit();

+ --PredSU->NumSuccsLeft;

+#ifndef NDEBUG

+ if (PredSU->NumSuccsLeft < 0) {

+ cerr << "*** Scheduling failed! ***\n";

+ PredSU->dump(this);

+ cerr << " has been released too many times!\n";

+ assert(0);

+ }

+#endif

+ // If all the node's successors are scheduled, this node is ready

+ // to be scheduled. Ignore the special EntrySU node.

+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {

+ PredSU->isAvailable = true;

+ AvailableQueue.push(PredSU);

+ }

+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {

+ // Bottom up: release predecessors

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ ReleasePred(SU, &*I);

+ if (I->isAssignedRegDep()) {

+ // This is a physical register dependency and it's impossible or

+ // expensive to copy the register. Make sure nothing that can

+ // clobber the register is scheduled between the predecessor and

+ // this node.

+ if (!LiveRegDefs[I->getReg()]) {

+ ++NumLiveRegs;

+ LiveRegDefs[I->getReg()] = I->getSUnit();

+ LiveRegCycles[I->getReg()] = CurCycle;

+ }

+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending

+/// count of its predecessors. If a predecessor pending count is zero, add it to

+/// the Available queue.

+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {

+ DOUT << "*** Scheduling [" << CurCycle << "]: ";

+ DEBUG(SU->dump(this));

+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");

+ SU->setHeightToAtLeast(CurCycle);

+ Sequence.push_back(SU);

+ ReleasePredecessors(SU, CurCycle);

+ // Release all the implicit physical register defs that are live.

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isAssignedRegDep()) {

+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {

+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");

+ assert(LiveRegDefs[I->getReg()] == SU &&

+ "Physical register dependency violated?");

+ --NumLiveRegs;

+ LiveRegDefs[I->getReg()] = NULL;

+ LiveRegCycles[I->getReg()] = 0;

+ }

+ SU->isScheduled = true;

+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled

+/// successors to the newly created node.

+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {

+ if (SU->getNode()->getFlaggedNode())

+ return NULL;

+ SDNode *N = SU->getNode();

+ if (!N)

+ return NULL;

+ SUnit *NewSU;

+ bool TryUnfold = false;

+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {

+ MVT VT = N->getValueType(i);

+ if (VT == MVT::Flag)

+ return NULL;

+ else if (VT == MVT::Other)

+ TryUnfold = true;

+ }

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ const SDValue &Op = N->getOperand(i);

+ MVT VT = Op.getNode()->getValueType(Op.getResNo());

+ if (VT == MVT::Flag)

+ return NULL;

+ }

+ if (TryUnfold) {

+ SmallVector<SDNode*, 2> NewNodes;

+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))

+ return NULL;

+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";

+ assert(NewNodes.size() == 2 && "Expected a load folding node!");

+ N = NewNodes[1];

+ SDNode *LoadNode = NewNodes[0];

+ unsigned NumVals = N->getNumValues();

+ unsigned OldNumVals = SU->getNode()->getNumValues();

+ for (unsigned i = 0; i != NumVals; ++i)

+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));

+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),

+ SDValue(LoadNode, 1));

+ SUnit *NewSU = NewSUnit(N);

+ assert(N->getNodeId() == -1 && "Node already inserted!");

+ N->setNodeId(NewSU->NodeNum);

+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());

+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {

+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {

+ NewSU->isTwoAddress = true;

+ break;

+ }

+ if (TID.isCommutable())

+ NewSU->isCommutable = true;

+ // LoadNode may already exist. This can happen when there is another

+ // load from the same location and producing the same type of value

+ // but it has different alignment or volatileness.

+ bool isNewLoad = true;

+ SUnit *LoadSU;

+ if (LoadNode->getNodeId() != -1) {

+ LoadSU = &SUnits[LoadNode->getNodeId()];

+ isNewLoad = false;

+ } else {

+ LoadSU = NewSUnit(LoadNode);

+ LoadNode->setNodeId(LoadSU->NodeNum);

+ }

+ SDep ChainPred;

+ SmallVector<SDep, 4> ChainSuccs;

+ SmallVector<SDep, 4> LoadPreds;

+ SmallVector<SDep, 4> NodePreds;

+ SmallVector<SDep, 4> NodeSuccs;

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isCtrl())

+ ChainPred = *I;

+ else if (I->getSUnit()->getNode() &&

+ I->getSUnit()->getNode()->isOperandOf(LoadNode))

+ LoadPreds.push_back(*I);

+ else

+ NodePreds.push_back(*I);

+ }

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isCtrl())

+ ChainSuccs.push_back(*I);

+ else

+ NodeSuccs.push_back(*I);

+ }

+ if (ChainPred.getSUnit()) {

+ RemovePred(SU, ChainPred);

+ if (isNewLoad)

+ AddPred(LoadSU, ChainPred);

+ }

+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {

+ const SDep &Pred = LoadPreds[i];

+ RemovePred(SU, Pred);

+ if (isNewLoad) {

+ AddPred(LoadSU, Pred);

+ }

+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {

+ const SDep &Pred = NodePreds[i];

+ RemovePred(SU, Pred);

+ AddPred(NewSU, Pred);

+ }

+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {

+ SDep D = NodeSuccs[i];

+ SUnit *SuccDep = D.getSUnit();

+ D.setSUnit(SU);

+ RemovePred(SuccDep, D);

+ D.setSUnit(NewSU);

+ AddPred(SuccDep, D);

+ }

+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {

+ SDep D = ChainSuccs[i];

+ SUnit *SuccDep = D.getSUnit();

+ D.setSUnit(SU);

+ RemovePred(SuccDep, D);

+ if (isNewLoad) {

+ D.setSUnit(LoadSU);

+ AddPred(SuccDep, D);

+ }

+ if (isNewLoad) {

+ AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));

+ }

+ ++NumUnfolds;

+ if (NewSU->NumSuccsLeft == 0) {

+ NewSU->isAvailable = true;

+ return NewSU;

+ }

+ SU = NewSU;

+ }

+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";

+ NewSU = Clone(SU);

+ // New SUnit has the exact same predecessors.

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I)

+ if (!I->isArtificial())

+ AddPred(NewSU, *I);

+ // Only copy scheduled successors. Cut them from old node's successor

+ // list and move them over.

+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isArtificial())

+ continue;

+ SUnit *SuccSU = I->getSUnit();

+ if (SuccSU->isScheduled) {

+ SDep D = *I;

+ D.setSUnit(NewSU);

+ AddPred(SuccSU, D);

+ D.setSUnit(SU);

+ DelDeps.push_back(std::make_pair(SuccSU, D));

+ }

+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)

+ RemovePred(DelDeps[i].first, DelDeps[i].second);

+ ++NumDups;

+ return NewSU;

+/// InsertCopiesAndMoveSuccs - Insert register copies and move all

+/// scheduled successors of the given SUnit to the last copy.

+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,

+ const TargetRegisterClass *DestRC,

+ const TargetRegisterClass *SrcRC,

+ SmallVector<SUnit*, 2> &Copies) {

+ SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));

+ CopyFromSU->CopySrcRC = SrcRC;

+ CopyFromSU->CopyDstRC = DestRC;

+ SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));

+ CopyToSU->CopySrcRC = DestRC;

+ CopyToSU->CopyDstRC = SrcRC;

+ // Only copy scheduled successors. Cut them from old node's successor

+ // list and move them over.

+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isArtificial())

+ continue;

+ SUnit *SuccSU = I->getSUnit();

+ if (SuccSU->isScheduled) {

+ SDep D = *I;

+ D.setSUnit(CopyToSU);

+ AddPred(SuccSU, D);

+ DelDeps.push_back(std::make_pair(SuccSU, *I));

+ }

+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {

+ RemovePred(DelDeps[i].first, DelDeps[i].second);

+ }

+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));

+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));

+ Copies.push_back(CopyFromSU);

+ Copies.push_back(CopyToSU);

+ ++NumPRCopies;

+/// getPhysicalRegisterVT - Returns the ValueType of the physical register

+/// definition of the specified node.

+/// FIXME: Move to SelectionDAG?

+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,

+ const TargetInstrInfo *TII) {

+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());

+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");

+ unsigned NumRes = TID.getNumDefs();

+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {

+ if (Reg == *ImpDef)

+ break;

+ ++NumRes;

+ }

+ return N->getValueType(NumRes);

+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay

+/// scheduling of the given node to satisfy live physical register dependencies.

+/// If the specific node is the last one that's available to schedule, do

+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.

+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,

+ SmallVector<unsigned, 4> &LRegs){

+ if (NumLiveRegs == 0)

+ return false;

+ SmallSet<unsigned, 4> RegAdded;

+ // If this node would clobber any "live" register, then it's not ready.

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isAssignedRegDep()) {

+ unsigned Reg = I->getReg();

+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {

+ if (RegAdded.insert(Reg))

+ LRegs.push_back(Reg);

+ }

+ for (const unsigned *Alias = TRI->getAliasSet(Reg);

+ *Alias; ++Alias)

+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {

+ if (RegAdded.insert(*Alias))

+ LRegs.push_back(*Alias);

+ }

+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {

+ if (!Node->isMachineOpcode())

+ continue;

+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());

+ if (!TID.ImplicitDefs)

+ continue;

+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {

+ if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {

+ if (RegAdded.insert(*Reg))

+ LRegs.push_back(*Reg);

+ }

+ for (const unsigned *Alias = TRI->getAliasSet(*Reg);

+ *Alias; ++Alias)

+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {

+ if (RegAdded.insert(*Alias))

+ LRegs.push_back(*Alias);

+ }

+ return !LRegs.empty();

+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up

+/// schedulers.

+void ScheduleDAGFast::ListScheduleBottomUp() {

+ unsigned CurCycle = 0;

+ // Release any predecessors of the special Exit node.

+ ReleasePredecessors(&ExitSU, CurCycle);

+ // Add root to Available queue.

+ if (!SUnits.empty()) {

+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];

+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");

+ RootSU->isAvailable = true;

+ AvailableQueue.push(RootSU);

+ }

+ // While Available queue is not empty, grab the node with the highest

+ // priority. If it is not ready put it back. Schedule the node.

+ SmallVector<SUnit*, 4> NotReady;

+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;

+ Sequence.reserve(SUnits.size());

+ while (!AvailableQueue.empty()) {

+ bool Delayed = false;

+ LRegsMap.clear();

+ SUnit *CurSU = AvailableQueue.pop();

+ while (CurSU) {

+ SmallVector<unsigned, 4> LRegs;

+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))

+ break;

+ Delayed = true;

+ LRegsMap.insert(std::make_pair(CurSU, LRegs));

+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.

+ NotReady.push_back(CurSU);

+ CurSU = AvailableQueue.pop();

+ }

+ // All candidates are delayed due to live physical reg dependencies.

+ // Try code duplication or inserting cross class copies

+ // to resolve it.

+ if (Delayed && !CurSU) {

+ if (!CurSU) {

+ // Try duplicating the nodes that produces these

+ // "expensive to copy" values to break the dependency. In case even

+ // that doesn't work, insert cross class copies.

+ SUnit *TrySU = NotReady[0];

+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];

+ assert(LRegs.size() == 1 && "Can't handle this yet!");

+ unsigned Reg = LRegs[0];

+ SUnit *LRDef = LiveRegDefs[Reg];

+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);

+ const TargetRegisterClass *RC =

+ TRI->getPhysicalRegisterRegClass(Reg, VT);

+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);

+ // If cross copy register class is null, then it must be possible copy

+ // the value directly. Do not try duplicate the def.

+ SUnit *NewDef = 0;

+ if (DestRC)

+ NewDef = CopyAndMoveSuccessors(LRDef);

+ else

+ DestRC = RC;

+ if (!NewDef) {

+ // Issue copies, these can be expensive cross register class copies.

+ SmallVector<SUnit*, 2> Copies;

+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);

+ DOUT << "Adding an edge from SU # " << TrySU->NodeNum

+ << " to SU #" << Copies.front()->NodeNum << "\n";

+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false, /*isArtificial=*/true));

+ NewDef = Copies.back();

+ }

+ DOUT << "Adding an edge from SU # " << NewDef->NodeNum

+ << " to SU #" << TrySU->NodeNum << "\n";

+ LiveRegDefs[Reg] = NewDef;

+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false, /*isArtificial=*/true));

+ TrySU->isAvailable = false;

+ CurSU = NewDef;

+ }

+ if (!CurSU) {

+ assert(false && "Unable to resolve live physical register dependencies!");

+ abort();

+ }

+ // Add the nodes that aren't ready back onto the available list.

+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {

+ NotReady[i]->isPending = false;

+ // May no longer be available due to backtracking.

+ if (NotReady[i]->isAvailable)

+ AvailableQueue.push(NotReady[i]);

+ }

+ NotReady.clear();

+ if (CurSU)

+ ScheduleNodeBottomUp(CurSU, CurCycle);

+ ++CurCycle;

+ }

+ // Reverse the order if it is bottom up.

+ std::reverse(Sequence.begin(), Sequence.end());

+#ifndef NDEBUG

+ // Verify that all SUnits were scheduled.

+ bool AnyNotSched = false;

+ unsigned DeadNodes = 0;

+ unsigned Noops = 0;

+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {

+ if (!SUnits[i].isScheduled) {

+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {

+ ++DeadNodes;

+ continue;

+ }

+ if (!AnyNotSched)

+ cerr << "*** List scheduling failed! ***\n";

+ SUnits[i].dump(this);

+ cerr << "has not been scheduled!\n";

+ AnyNotSched = true;

+ }

+ if (SUnits[i].NumSuccsLeft != 0) {

+ if (!AnyNotSched)

+ cerr << "*** List scheduling failed! ***\n";

+ SUnits[i].dump(this);

+ cerr << "has successors left!\n";

+ AnyNotSched = true;

+ }

+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)

+ if (!Sequence[i])

+ ++Noops;

+ assert(!AnyNotSched);

+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&

+ "The number of nodes scheduled doesn't match the expected number!");

+#endif

+//===----------------------------------------------------------------------===//

+// Public Constructor Functions

+//===----------------------------------------------------------------------===//

+llvm::ScheduleDAGSDNodes *

+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {

+ return new ScheduleDAGFast(*IS->MF);

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 000000000000..c4325349990d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp

@@ -0,0 +1,268 @@

+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements a top-down list scheduler, using standard algorithms.

+// The basic approach uses a priority queue of available nodes to schedule.

+// One at a time, nodes are taken from the priority queue (thus in priority

+// order), checked for legality to schedule, and emitted if legal.

+//

+// Nodes may not be legal to schedule either due to structural hazards (e.g.

+// pipeline or resource constraints) or because an input to the instruction has

+// not completed execution.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "pre-RA-sched"

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/CodeGen/LatencyPriorityQueue.h"

+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

+#include "llvm/CodeGen/SchedulerRegistry.h"

+#include "llvm/CodeGen/SelectionDAGISel.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/ADT/PriorityQueue.h"

+#include "llvm/ADT/Statistic.h"

+#include <climits>

+using namespace llvm;

+STATISTIC(NumNoops , "Number of noops inserted");

+STATISTIC(NumStalls, "Number of pipeline stalls");

+static RegisterScheduler

+ tdListDAGScheduler("list-td", "Top-down list scheduler",

+ createTDListDAGScheduler);

+namespace {

+//===----------------------------------------------------------------------===//

+/// ScheduleDAGList - The actual list scheduler implementation. This supports

+/// top-down scheduling.

+///

+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes {

+private:

+ /// AvailableQueue - The priority queue to use for the available SUnits.

+ ///

+ SchedulingPriorityQueue *AvailableQueue;

+ /// PendingQueue - This contains all of the instructions whose operands have

+ /// been issued, but their results are not ready yet (due to the latency of

+ /// the operation). Once the operands become available, the instruction is

+ /// added to the AvailableQueue.

+ std::vector<SUnit*> PendingQueue;

+ /// HazardRec - The hazard recognizer to use.

+ ScheduleHazardRecognizer *HazardRec;

+public:

+ ScheduleDAGList(MachineFunction &mf,

+ SchedulingPriorityQueue *availqueue,

+ ScheduleHazardRecognizer *HR)

+ : ScheduleDAGSDNodes(mf),

+ AvailableQueue(availqueue), HazardRec(HR) {

+ }

+ ~ScheduleDAGList() {

+ delete HazardRec;

+ delete AvailableQueue;

+ }

+ void Schedule();

+private:

+ void ReleaseSucc(SUnit *SU, const SDep &D);

+ void ReleaseSuccessors(SUnit *SU);

+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);

+ void ListScheduleTopDown();

+};

+} // end anonymous namespace

+/// Schedule - Schedule the DAG using list scheduling.

+void ScheduleDAGList::Schedule() {

+ DOUT << "********** List Scheduling **********\n";

+ // Build the scheduling graph.

+ BuildSchedGraph();

+ AvailableQueue->initNodes(SUnits);

+ ListScheduleTopDown();

+ AvailableQueue->releaseState();

+//===----------------------------------------------------------------------===//

+// Top-Down Scheduling

+//===----------------------------------------------------------------------===//

+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to

+/// the PendingQueue if the count reaches zero. Also update its cycle bound.

+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {

+ SUnit *SuccSU = D.getSUnit();

+ --SuccSU->NumPredsLeft;

+#ifndef NDEBUG

+ if (SuccSU->NumPredsLeft < 0) {

+ cerr << "*** Scheduling failed! ***\n";

+ SuccSU->dump(this);

+ cerr << " has been released too many times!\n";

+ assert(0);

+ }

+#endif

+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());

+ // If all the node's predecessors are scheduled, this node is ready

+ // to be scheduled. Ignore the special ExitSU node.

+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)

+ PendingQueue.push_back(SuccSU);

+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {

+ // Top down: release successors.

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ assert(!I->isAssignedRegDep() &&

+ "The list-td scheduler doesn't yet support physreg dependencies!");

+ ReleaseSucc(SU, *I);

+ }

+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending

+/// count of its successors. If a successor pending count is zero, add it to

+/// the Available queue.

+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {

+ DOUT << "*** Scheduling [" << CurCycle << "]: ";

+ DEBUG(SU->dump(this));

+ Sequence.push_back(SU);

+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");

+ SU->setDepthToAtLeast(CurCycle);

+ ReleaseSuccessors(SU);

+ SU->isScheduled = true;

+ AvailableQueue->ScheduledNode(SU);

+/// ListScheduleTopDown - The main loop of list scheduling for top-down

+/// schedulers.

+void ScheduleDAGList::ListScheduleTopDown() {

+ unsigned CurCycle = 0;

+ // Release any successors of the special Entry node.

+ ReleaseSuccessors(&EntrySU);

+ // All leaves to Available queue.

+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {

+ // It is available if it has no predecessors.

+ if (SUnits[i].Preds.empty()) {

+ AvailableQueue->push(&SUnits[i]);

+ SUnits[i].isAvailable = true;

+ }

+ // While Available queue is not empty, grab the node with the highest

+ // priority. If it is not ready put it back. Schedule the node.

+ std::vector<SUnit*> NotReady;

+ Sequence.reserve(SUnits.size());

+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {

+ // Check to see if any of the pending instructions are ready to issue. If

+ // so, add them to the available queue.

+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {

+ if (PendingQueue[i]->getDepth() == CurCycle) {

+ AvailableQueue->push(PendingQueue[i]);

+ PendingQueue[i]->isAvailable = true;

+ PendingQueue[i] = PendingQueue.back();

+ PendingQueue.pop_back();

+ --i; --e;

+ } else {

+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");

+ }

+ // If there are no instructions available, don't try to issue anything, and

+ // don't advance the hazard recognizer.

+ if (AvailableQueue->empty()) {

+ ++CurCycle;

+ continue;

+ }

+ SUnit *FoundSUnit = 0;

+ bool HasNoopHazards = false;

+ while (!AvailableQueue->empty()) {

+ SUnit *CurSUnit = AvailableQueue->pop();

+ ScheduleHazardRecognizer::HazardType HT =

+ HazardRec->getHazardType(CurSUnit);

+ if (HT == ScheduleHazardRecognizer::NoHazard) {

+ FoundSUnit = CurSUnit;

+ break;

+ }

+ // Remember if this is a noop hazard.

+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;

+ NotReady.push_back(CurSUnit);

+ }

+ // Add the nodes that aren't ready back onto the available list.

+ if (!NotReady.empty()) {

+ AvailableQueue->push_all(NotReady);

+ NotReady.clear();

+ }

+ // If we found a node to schedule, do it now.

+ if (FoundSUnit) {

+ ScheduleNodeTopDown(FoundSUnit, CurCycle);

+ HazardRec->EmitInstruction(FoundSUnit);

+ // If this is a pseudo-op node, we don't want to increment the current

+ // cycle.

+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!

+ ++CurCycle;

+ } else if (!HasNoopHazards) {

+ // Otherwise, we have a pipeline stall, but no other problem, just advance

+ // the current cycle and try again.

+ DOUT << "*** Advancing cycle, no work to do\n";

+ HazardRec->AdvanceCycle();

+ ++NumStalls;

+ ++CurCycle;

+ } else {

+ // Otherwise, we have no instructions to issue and we have instructions

+ // that will fault if we don't do this right. This is the case for

+ // processors without pipeline interlocks and other cases.

+ DOUT << "*** Emitting noop\n";

+ HazardRec->EmitNoop();

+ Sequence.push_back(0); // NULL here means noop

+ ++NumNoops;

+ ++CurCycle;

+ }

+#ifndef NDEBUG

+ VerifySchedule(/*isBottomUp=*/false);

+#endif

+//===----------------------------------------------------------------------===//

+// Public Constructor Functions

+//===----------------------------------------------------------------------===//

+/// createTDListDAGScheduler - This creates a top-down list scheduler with a

+/// new hazard recognizer. This scheduler takes ownership of the hazard

+/// recognizer and deletes it when done.

+ScheduleDAGSDNodes *

+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {

+ return new ScheduleDAGList(*IS->MF,

+ new LatencyPriorityQueue(),

+ IS->CreateTargetHazardRecognizer());

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..c97e2a8c86bf
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

@@ -0,0 +1,1533 @@

+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements bottom-up and top-down register pressure reduction list

+// schedulers, using standard algorithms. The basic approach uses a priority

+// queue of available nodes to schedule. One at a time, nodes are taken from

+// the priority queue (thus in priority order), checked for legality to

+// schedule, and emitted if legal.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "pre-RA-sched"

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/CodeGen/SchedulerRegistry.h"

+#include "llvm/CodeGen/SelectionDAGISel.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/ADT/PriorityQueue.h"

+#include "llvm/ADT/SmallSet.h"

+#include "llvm/ADT/Statistic.h"

+#include "llvm/ADT/STLExtras.h"

+#include <climits>

+using namespace llvm;

+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");

+STATISTIC(NumUnfolds, "Number of nodes unfolded");

+STATISTIC(NumDups, "Number of duplicated nodes");

+STATISTIC(NumPRCopies, "Number of physical register copies");

+static RegisterScheduler

+ burrListDAGScheduler("list-burr",

+ "Bottom-up register reduction list scheduling",

+ createBURRListDAGScheduler);

+static RegisterScheduler

+ tdrListrDAGScheduler("list-tdrr",

+ "Top-down register reduction list scheduling",

+ createTDRRListDAGScheduler);

+namespace {

+//===----------------------------------------------------------------------===//

+/// ScheduleDAGRRList - The actual register reduction list scheduler

+/// implementation. This supports both top-down and bottom-up scheduling.

+///

+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {

+private:

+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if

+ /// it is top-down.

+ bool isBottomUp;

+ /// AvailableQueue - The priority queue to use for the available SUnits.

+ SchedulingPriorityQueue *AvailableQueue;

+ /// LiveRegDefs - A set of physical registers and their definition

+ /// that are "live". These nodes must be scheduled before any other nodes that

+ /// modifies the registers can be scheduled.

+ unsigned NumLiveRegs;

+ std::vector<SUnit*> LiveRegDefs;

+ std::vector<unsigned> LiveRegCycles;

+ /// Topo - A topological ordering for SUnits which permits fast IsReachable

+ /// and similar queries.

+ ScheduleDAGTopologicalSort Topo;

+public:

+ ScheduleDAGRRList(MachineFunction &mf,

+ bool isbottomup,

+ SchedulingPriorityQueue *availqueue)

+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),

+ AvailableQueue(availqueue), Topo(SUnits) {

+ }

+ ~ScheduleDAGRRList() {

+ delete AvailableQueue;

+ }

+ void Schedule();

+ /// IsReachable - Checks if SU is reachable from TargetSU.

+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {

+ return Topo.IsReachable(SU, TargetSU);

+ }

+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will

+ /// create a cycle.

+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {

+ return Topo.WillCreateCycle(SU, TargetSU);

+ }

+ /// AddPred - adds a predecessor edge to SUnit SU.

+ /// This returns true if this is a new predecessor.

+ /// Updates the topological ordering if required.

+ void AddPred(SUnit *SU, const SDep &D) {

+ Topo.AddPred(SU, D.getSUnit());

+ SU->addPred(D);

+ }

+ /// RemovePred - removes a predecessor edge from SUnit SU.

+ /// This returns true if an edge was removed.

+ /// Updates the topological ordering if required.

+ void RemovePred(SUnit *SU, const SDep &D) {

+ Topo.RemovePred(SU, D.getSUnit());

+ SU->removePred(D);

+ }

+private:

+ void ReleasePred(SUnit *SU, const SDep *PredEdge);

+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);

+ void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);

+ void ReleaseSuccessors(SUnit *SU);

+ void CapturePred(SDep *PredEdge);

+ void ScheduleNodeBottomUp(SUnit*, unsigned);

+ void ScheduleNodeTopDown(SUnit*, unsigned);

+ void UnscheduleNodeBottomUp(SUnit*);

+ void BacktrackBottomUp(SUnit*, unsigned, unsigned&);

+ SUnit *CopyAndMoveSuccessors(SUnit*);

+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,

+ const TargetRegisterClass*,

+ SmallVector<SUnit*, 2>&);

+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);

+ void ListScheduleTopDown();

+ void ListScheduleBottomUp();

+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.

+ /// Updates the topological ordering if required.

+ SUnit *CreateNewSUnit(SDNode *N) {

+ unsigned NumSUnits = SUnits.size();

+ SUnit *NewNode = NewSUnit(N);

+ // Update the topological ordering.

+ if (NewNode->NodeNum >= NumSUnits)

+ Topo.InitDAGTopologicalSorting();

+ return NewNode;

+ }

+ /// CreateClone - Creates a new SUnit from an existing one.

+ /// Updates the topological ordering if required.

+ SUnit *CreateClone(SUnit *N) {

+ unsigned NumSUnits = SUnits.size();

+ SUnit *NewNode = Clone(N);

+ // Update the topological ordering.

+ if (NewNode->NodeNum >= NumSUnits)

+ Topo.InitDAGTopologicalSorting();

+ return NewNode;

+ }

+ /// ForceUnitLatencies - Return true, since register-pressure-reducing

+ /// scheduling doesn't need actual latency information.

+ bool ForceUnitLatencies() const { return true; }

+};

+} // end anonymous namespace

+/// Schedule - Schedule the DAG using list scheduling.

+void ScheduleDAGRRList::Schedule() {

+ DOUT << "********** List Scheduling **********\n";

+ NumLiveRegs = 0;

+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);

+ LiveRegCycles.resize(TRI->getNumRegs(), 0);

+ // Build the scheduling graph.

+ BuildSchedGraph();

+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)

+ SUnits[su].dumpAll(this));

+ Topo.InitDAGTopologicalSorting();

+ AvailableQueue->initNodes(SUnits);

+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.

+ if (isBottomUp)

+ ListScheduleBottomUp();

+ else

+ ListScheduleTopDown();

+ AvailableQueue->releaseState();

+//===----------------------------------------------------------------------===//

+// Bottom-Up Scheduling

+//===----------------------------------------------------------------------===//

+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to

+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.

+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {

+ SUnit *PredSU = PredEdge->getSUnit();

+ --PredSU->NumSuccsLeft;

+#ifndef NDEBUG

+ if (PredSU->NumSuccsLeft < 0) {

+ cerr << "*** Scheduling failed! ***\n";

+ PredSU->dump(this);

+ cerr << " has been released too many times!\n";

+ assert(0);

+ }

+#endif

+ // If all the node's successors are scheduled, this node is ready

+ // to be scheduled. Ignore the special EntrySU node.

+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {

+ PredSU->isAvailable = true;

+ AvailableQueue->push(PredSU);

+ }

+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {

+ // Bottom up: release predecessors

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ ReleasePred(SU, &*I);

+ if (I->isAssignedRegDep()) {

+ // This is a physical register dependency and it's impossible or

+ // expensive to copy the register. Make sure nothing that can

+ // clobber the register is scheduled between the predecessor and

+ // this node.

+ if (!LiveRegDefs[I->getReg()]) {

+ ++NumLiveRegs;

+ LiveRegDefs[I->getReg()] = I->getSUnit();

+ LiveRegCycles[I->getReg()] = CurCycle;

+ }

+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending

+/// count of its predecessors. If a predecessor pending count is zero, add it to

+/// the Available queue.

+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {

+ DOUT << "*** Scheduling [" << CurCycle << "]: ";

+ DEBUG(SU->dump(this));

+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");

+ SU->setHeightToAtLeast(CurCycle);

+ Sequence.push_back(SU);

+ ReleasePredecessors(SU, CurCycle);

+ // Release all the implicit physical register defs that are live.

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isAssignedRegDep()) {

+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {

+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");

+ assert(LiveRegDefs[I->getReg()] == SU &&

+ "Physical register dependency violated?");

+ --NumLiveRegs;

+ LiveRegDefs[I->getReg()] = NULL;

+ LiveRegCycles[I->getReg()] = 0;

+ }

+ SU->isScheduled = true;

+ AvailableQueue->ScheduledNode(SU);

+/// CapturePred - This does the opposite of ReleasePred. Since SU is being

+/// unscheduled, incrcease the succ left count of its predecessors. Remove

+/// them from AvailableQueue if necessary.

+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {

+ SUnit *PredSU = PredEdge->getSUnit();

+ if (PredSU->isAvailable) {

+ PredSU->isAvailable = false;

+ if (!PredSU->isPending)

+ AvailableQueue->remove(PredSU);

+ }

+ ++PredSU->NumSuccsLeft;

+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and

+/// its predecessor states to reflect the change.

+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {

+ DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";

+ DEBUG(SU->dump(this));

+ AvailableQueue->UnscheduledNode(SU);

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ CapturePred(&*I);

+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {

+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");

+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&

+ "Physical register dependency violated?");

+ --NumLiveRegs;

+ LiveRegDefs[I->getReg()] = NULL;

+ LiveRegCycles[I->getReg()] = 0;

+ }

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isAssignedRegDep()) {

+ if (!LiveRegDefs[I->getReg()]) {

+ LiveRegDefs[I->getReg()] = SU;

+ ++NumLiveRegs;

+ }

+ if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])

+ LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();

+ }

+ SU->setHeightDirty();

+ SU->isScheduled = false;

+ SU->isAvailable = true;

+ AvailableQueue->push(SU);

+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in

+/// BTCycle in order to schedule a specific node.

+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,

+ unsigned &CurCycle) {

+ SUnit *OldSU = NULL;

+ while (CurCycle > BtCycle) {

+ OldSU = Sequence.back();

+ Sequence.pop_back();

+ if (SU->isSucc(OldSU))

+ // Don't try to remove SU from AvailableQueue.

+ SU->isAvailable = false;

+ UnscheduleNodeBottomUp(OldSU);

+ --CurCycle;

+ }

+ assert(!SU->isSucc(OldSU) && "Something is wrong!");

+ ++NumBacktracks;

+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled

+/// successors to the newly created node.

+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {

+ if (SU->getNode()->getFlaggedNode())

+ return NULL;

+ SDNode *N = SU->getNode();

+ if (!N)

+ return NULL;

+ SUnit *NewSU;

+ bool TryUnfold = false;

+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {

+ MVT VT = N->getValueType(i);

+ if (VT == MVT::Flag)

+ return NULL;

+ else if (VT == MVT::Other)

+ TryUnfold = true;

+ }

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ const SDValue &Op = N->getOperand(i);

+ MVT VT = Op.getNode()->getValueType(Op.getResNo());

+ if (VT == MVT::Flag)

+ return NULL;

+ }

+ if (TryUnfold) {

+ SmallVector<SDNode*, 2> NewNodes;

+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))

+ return NULL;

+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";

+ assert(NewNodes.size() == 2 && "Expected a load folding node!");

+ N = NewNodes[1];

+ SDNode *LoadNode = NewNodes[0];

+ unsigned NumVals = N->getNumValues();

+ unsigned OldNumVals = SU->getNode()->getNumValues();

+ for (unsigned i = 0; i != NumVals; ++i)

+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));

+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),

+ SDValue(LoadNode, 1));

+ // LoadNode may already exist. This can happen when there is another

+ // load from the same location and producing the same type of value

+ // but it has different alignment or volatileness.

+ bool isNewLoad = true;

+ SUnit *LoadSU;

+ if (LoadNode->getNodeId() != -1) {

+ LoadSU = &SUnits[LoadNode->getNodeId()];

+ isNewLoad = false;

+ } else {

+ LoadSU = CreateNewSUnit(LoadNode);

+ LoadNode->setNodeId(LoadSU->NodeNum);

+ ComputeLatency(LoadSU);

+ }

+ SUnit *NewSU = CreateNewSUnit(N);

+ assert(N->getNodeId() == -1 && "Node already inserted!");

+ N->setNodeId(NewSU->NodeNum);

+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());

+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {

+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {

+ NewSU->isTwoAddress = true;

+ break;

+ }

+ if (TID.isCommutable())

+ NewSU->isCommutable = true;

+ ComputeLatency(NewSU);

+ // Record all the edges to and from the old SU, by category.

+ SmallVector<SDep, 4> ChainPreds;

+ SmallVector<SDep, 4> ChainSuccs;

+ SmallVector<SDep, 4> LoadPreds;

+ SmallVector<SDep, 4> NodePreds;

+ SmallVector<SDep, 4> NodeSuccs;

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isCtrl())

+ ChainPreds.push_back(*I);

+ else if (I->getSUnit()->getNode() &&

+ I->getSUnit()->getNode()->isOperandOf(LoadNode))

+ LoadPreds.push_back(*I);

+ else

+ NodePreds.push_back(*I);

+ }

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isCtrl())

+ ChainSuccs.push_back(*I);

+ else

+ NodeSuccs.push_back(*I);

+ }

+ // Now assign edges to the newly-created nodes.

+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {

+ const SDep &Pred = ChainPreds[i];

+ RemovePred(SU, Pred);

+ if (isNewLoad)

+ AddPred(LoadSU, Pred);

+ }

+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {

+ const SDep &Pred = LoadPreds[i];

+ RemovePred(SU, Pred);

+ if (isNewLoad)

+ AddPred(LoadSU, Pred);

+ }

+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {

+ const SDep &Pred = NodePreds[i];

+ RemovePred(SU, Pred);

+ AddPred(NewSU, Pred);

+ }

+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {

+ SDep D = NodeSuccs[i];

+ SUnit *SuccDep = D.getSUnit();

+ D.setSUnit(SU);

+ RemovePred(SuccDep, D);

+ D.setSUnit(NewSU);

+ AddPred(SuccDep, D);

+ }

+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {

+ SDep D = ChainSuccs[i];

+ SUnit *SuccDep = D.getSUnit();

+ D.setSUnit(SU);

+ RemovePred(SuccDep, D);

+ if (isNewLoad) {

+ D.setSUnit(LoadSU);

+ AddPred(SuccDep, D);

+ }

+ // Add a data dependency to reflect that NewSU reads the value defined

+ // by LoadSU.

+ AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));

+ if (isNewLoad)

+ AvailableQueue->addNode(LoadSU);

+ AvailableQueue->addNode(NewSU);

+ ++NumUnfolds;

+ if (NewSU->NumSuccsLeft == 0) {

+ NewSU->isAvailable = true;

+ return NewSU;

+ }

+ SU = NewSU;

+ }

+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";

+ NewSU = CreateClone(SU);

+ // New SUnit has the exact same predecessors.

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I)

+ if (!I->isArtificial())

+ AddPred(NewSU, *I);

+ // Only copy scheduled successors. Cut them from old node's successor

+ // list and move them over.

+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isArtificial())

+ continue;

+ SUnit *SuccSU = I->getSUnit();

+ if (SuccSU->isScheduled) {

+ SDep D = *I;

+ D.setSUnit(NewSU);

+ AddPred(SuccSU, D);

+ D.setSUnit(SU);

+ DelDeps.push_back(std::make_pair(SuccSU, D));

+ }

+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)

+ RemovePred(DelDeps[i].first, DelDeps[i].second);

+ AvailableQueue->updateNode(SU);

+ AvailableQueue->addNode(NewSU);

+ ++NumDups;

+ return NewSU;

+/// InsertCopiesAndMoveSuccs - Insert register copies and move all

+/// scheduled successors of the given SUnit to the last copy.

+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,

+ const TargetRegisterClass *DestRC,

+ const TargetRegisterClass *SrcRC,

+ SmallVector<SUnit*, 2> &Copies) {

+ SUnit *CopyFromSU = CreateNewSUnit(NULL);

+ CopyFromSU->CopySrcRC = SrcRC;

+ CopyFromSU->CopyDstRC = DestRC;

+ SUnit *CopyToSU = CreateNewSUnit(NULL);

+ CopyToSU->CopySrcRC = DestRC;

+ CopyToSU->CopyDstRC = SrcRC;

+ // Only copy scheduled successors. Cut them from old node's successor

+ // list and move them over.

+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isArtificial())

+ continue;

+ SUnit *SuccSU = I->getSUnit();

+ if (SuccSU->isScheduled) {

+ SDep D = *I;

+ D.setSUnit(CopyToSU);

+ AddPred(SuccSU, D);

+ DelDeps.push_back(std::make_pair(SuccSU, *I));

+ }

+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)

+ RemovePred(DelDeps[i].first, DelDeps[i].second);

+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));

+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));

+ AvailableQueue->updateNode(SU);

+ AvailableQueue->addNode(CopyFromSU);

+ AvailableQueue->addNode(CopyToSU);

+ Copies.push_back(CopyFromSU);

+ Copies.push_back(CopyToSU);

+ ++NumPRCopies;

+/// getPhysicalRegisterVT - Returns the ValueType of the physical register

+/// definition of the specified node.

+/// FIXME: Move to SelectionDAG?

+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,

+ const TargetInstrInfo *TII) {

+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());

+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");

+ unsigned NumRes = TID.getNumDefs();

+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {

+ if (Reg == *ImpDef)

+ break;

+ ++NumRes;

+ }

+ return N->getValueType(NumRes);

+/// CheckForLiveRegDef - Return true and update live register vector if the

+/// specified register def of the specified SUnit clobbers any "live" registers.

+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,

+ std::vector<SUnit*> &LiveRegDefs,

+ SmallSet<unsigned, 4> &RegAdded,

+ SmallVector<unsigned, 4> &LRegs,

+ const TargetRegisterInfo *TRI) {

+ bool Added = false;

+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {

+ if (RegAdded.insert(Reg)) {

+ LRegs.push_back(Reg);

+ Added = true;

+ }

+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)

+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {

+ if (RegAdded.insert(*Alias)) {

+ LRegs.push_back(*Alias);

+ Added = true;

+ }

+ return Added;

+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay

+/// scheduling of the given node to satisfy live physical register dependencies.

+/// If the specific node is the last one that's available to schedule, do

+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.

+bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,

+ SmallVector<unsigned, 4> &LRegs){

+ if (NumLiveRegs == 0)

+ return false;

+ SmallSet<unsigned, 4> RegAdded;

+ // If this node would clobber any "live" register, then it's not ready.

+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isAssignedRegDep())

+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,

+ RegAdded, LRegs, TRI);

+ }

+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {

+ if (Node->getOpcode() == ISD::INLINEASM) {

+ // Inline asm can clobber physical defs.

+ unsigned NumOps = Node->getNumOperands();

+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)

+ --NumOps; // Ignore the flag operand.

+ for (unsigned i = 2; i != NumOps;) {

+ unsigned Flags =

+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();

+ unsigned NumVals = (Flags & 0xffff) >> 3;

+ ++i; // Skip the ID value.

+ if ((Flags & 7) == 2 || (Flags & 7) == 6) {

+ // Check for def of register or earlyclobber register.

+ for (; NumVals; --NumVals, ++i) {

+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

+ if (TargetRegisterInfo::isPhysicalRegister(Reg))

+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);

+ }

+ } else

+ i += NumVals;

+ }

+ continue;

+ }

+ if (!Node->isMachineOpcode())

+ continue;

+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());

+ if (!TID.ImplicitDefs)

+ continue;

+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)

+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);

+ }

+ return !LRegs.empty();

+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up

+/// schedulers.

+void ScheduleDAGRRList::ListScheduleBottomUp() {

+ unsigned CurCycle = 0;

+ // Release any predecessors of the special Exit node.

+ ReleasePredecessors(&ExitSU, CurCycle);

+ // Add root to Available queue.

+ if (!SUnits.empty()) {

+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];

+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");

+ RootSU->isAvailable = true;

+ AvailableQueue->push(RootSU);

+ }

+ // While Available queue is not empty, grab the node with the highest

+ // priority. If it is not ready put it back. Schedule the node.

+ SmallVector<SUnit*, 4> NotReady;

+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;

+ Sequence.reserve(SUnits.size());

+ while (!AvailableQueue->empty()) {

+ bool Delayed = false;

+ LRegsMap.clear();

+ SUnit *CurSU = AvailableQueue->pop();

+ while (CurSU) {

+ SmallVector<unsigned, 4> LRegs;

+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))

+ break;

+ Delayed = true;

+ LRegsMap.insert(std::make_pair(CurSU, LRegs));

+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.

+ NotReady.push_back(CurSU);

+ CurSU = AvailableQueue->pop();

+ }

+ // All candidates are delayed due to live physical reg dependencies.

+ // Try backtracking, code duplication, or inserting cross class copies

+ // to resolve it.

+ if (Delayed && !CurSU) {

+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {

+ SUnit *TrySU = NotReady[i];

+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];

+ // Try unscheduling up to the point where it's safe to schedule

+ // this node.

+ unsigned LiveCycle = CurCycle;

+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {

+ unsigned Reg = LRegs[j];

+ unsigned LCycle = LiveRegCycles[Reg];

+ LiveCycle = std::min(LiveCycle, LCycle);

+ }

+ SUnit *OldSU = Sequence[LiveCycle];

+ if (!WillCreateCycle(TrySU, OldSU)) {

+ BacktrackBottomUp(TrySU, LiveCycle, CurCycle);

+ // Force the current node to be scheduled before the node that

+ // requires the physical reg dep.

+ if (OldSU->isAvailable) {

+ OldSU->isAvailable = false;

+ AvailableQueue->remove(OldSU);

+ }

+ AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false, /*isArtificial=*/true));

+ // If one or more successors has been unscheduled, then the current

+ // node is no longer avaialable. Schedule a successor that's now

+ // available instead.

+ if (!TrySU->isAvailable)

+ CurSU = AvailableQueue->pop();

+ else {

+ CurSU = TrySU;

+ TrySU->isPending = false;

+ NotReady.erase(NotReady.begin()+i);

+ }

+ break;

+ }

+ if (!CurSU) {

+ // Can't backtrack. If it's too expensive to copy the value, then try

+ // duplicate the nodes that produces these "too expensive to copy"

+ // values to break the dependency. In case even that doesn't work,

+ // insert cross class copies.

+ // If it's not too expensive, i.e. cost != -1, issue copies.

+ SUnit *TrySU = NotReady[0];

+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];

+ assert(LRegs.size() == 1 && "Can't handle this yet!");

+ unsigned Reg = LRegs[0];

+ SUnit *LRDef = LiveRegDefs[Reg];

+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);

+ const TargetRegisterClass *RC =

+ TRI->getPhysicalRegisterRegClass(Reg, VT);

+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);

+ // If cross copy register class is null, then it must be possible copy

+ // the value directly. Do not try duplicate the def.

+ SUnit *NewDef = 0;

+ if (DestRC)

+ NewDef = CopyAndMoveSuccessors(LRDef);

+ else

+ DestRC = RC;

+ if (!NewDef) {

+ // Issue copies, these can be expensive cross register class copies.

+ SmallVector<SUnit*, 2> Copies;

+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);

+ DOUT << "Adding an edge from SU #" << TrySU->NodeNum

+ << " to SU #" << Copies.front()->NodeNum << "\n";

+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false,

+ /*isArtificial=*/true));

+ NewDef = Copies.back();

+ }

+ DOUT << "Adding an edge from SU #" << NewDef->NodeNum

+ << " to SU #" << TrySU->NodeNum << "\n";

+ LiveRegDefs[Reg] = NewDef;

+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false,

+ /*isArtificial=*/true));

+ TrySU->isAvailable = false;

+ CurSU = NewDef;

+ }

+ assert(CurSU && "Unable to resolve live physical register dependencies!");

+ }

+ // Add the nodes that aren't ready back onto the available list.

+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {

+ NotReady[i]->isPending = false;

+ // May no longer be available due to backtracking.

+ if (NotReady[i]->isAvailable)

+ AvailableQueue->push(NotReady[i]);

+ }

+ NotReady.clear();

+ if (CurSU)

+ ScheduleNodeBottomUp(CurSU, CurCycle);

+ ++CurCycle;

+ }

+ // Reverse the order if it is bottom up.

+ std::reverse(Sequence.begin(), Sequence.end());

+#ifndef NDEBUG

+ VerifySchedule(isBottomUp);

+#endif

+//===----------------------------------------------------------------------===//

+// Top-Down Scheduling

+//===----------------------------------------------------------------------===//

+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to

+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.

+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {

+ SUnit *SuccSU = SuccEdge->getSUnit();

+ --SuccSU->NumPredsLeft;

+#ifndef NDEBUG

+ if (SuccSU->NumPredsLeft < 0) {

+ cerr << "*** Scheduling failed! ***\n";

+ SuccSU->dump(this);

+ cerr << " has been released too many times!\n";

+ assert(0);

+ }

+#endif

+ // If all the node's predecessors are scheduled, this node is ready

+ // to be scheduled. Ignore the special ExitSU node.

+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {

+ SuccSU->isAvailable = true;

+ AvailableQueue->push(SuccSU);

+ }

+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {

+ // Top down: release successors

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ assert(!I->isAssignedRegDep() &&

+ "The list-tdrr scheduler doesn't yet support physreg dependencies!");

+ ReleaseSucc(SU, &*I);

+ }

+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending

+/// count of its successors. If a successor pending count is zero, add it to

+/// the Available queue.

+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {

+ DOUT << "*** Scheduling [" << CurCycle << "]: ";

+ DEBUG(SU->dump(this));

+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");

+ SU->setDepthToAtLeast(CurCycle);

+ Sequence.push_back(SU);

+ ReleaseSuccessors(SU);

+ SU->isScheduled = true;

+ AvailableQueue->ScheduledNode(SU);

+/// ListScheduleTopDown - The main loop of list scheduling for top-down

+/// schedulers.

+void ScheduleDAGRRList::ListScheduleTopDown() {

+ unsigned CurCycle = 0;

+ // Release any successors of the special Entry node.

+ ReleaseSuccessors(&EntrySU);

+ // All leaves to Available queue.

+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {

+ // It is available if it has no predecessors.

+ if (SUnits[i].Preds.empty()) {

+ AvailableQueue->push(&SUnits[i]);

+ SUnits[i].isAvailable = true;

+ }

+ // While Available queue is not empty, grab the node with the highest

+ // priority. If it is not ready put it back. Schedule the node.

+ Sequence.reserve(SUnits.size());

+ while (!AvailableQueue->empty()) {

+ SUnit *CurSU = AvailableQueue->pop();

+ if (CurSU)

+ ScheduleNodeTopDown(CurSU, CurCycle);

+ ++CurCycle;

+ }

+#ifndef NDEBUG

+ VerifySchedule(isBottomUp);

+#endif

+//===----------------------------------------------------------------------===//

+// RegReductionPriorityQueue Implementation

+//===----------------------------------------------------------------------===//

+//

+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers

+// to reduce register pressure.

+//

+namespace {

+ template<class SF>

+ class RegReductionPriorityQueue;

+ /// Sorting functions for the Available queue.

+ struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {

+ RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;

+ bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}

+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}

+ bool operator()(const SUnit* left, const SUnit* right) const;

+ };

+ struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {

+ RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;

+ td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}

+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}

+ bool operator()(const SUnit* left, const SUnit* right) const;

+ };

+} // end anonymous namespace

+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.

+/// Smaller number is the higher priority.

+static unsigned

+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {

+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];

+ if (SethiUllmanNumber != 0)

+ return SethiUllmanNumber;

+ unsigned Extra = 0;

+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isCtrl()) continue; // ignore chain preds

+ SUnit *PredSU = I->getSUnit();

+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);

+ if (PredSethiUllman > SethiUllmanNumber) {

+ SethiUllmanNumber = PredSethiUllman;

+ Extra = 0;

+ } else if (PredSethiUllman == SethiUllmanNumber)

+ ++Extra;

+ }

+ SethiUllmanNumber += Extra;

+ if (SethiUllmanNumber == 0)

+ SethiUllmanNumber = 1;

+ return SethiUllmanNumber;

+namespace {

+ template<class SF>

+ class VISIBILITY_HIDDEN RegReductionPriorityQueue

+ : public SchedulingPriorityQueue {

+ PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;

+ unsigned currentQueueId;

+ protected:

+ // SUnits - The SUnits for the current graph.

+ std::vector<SUnit> *SUnits;

+ const TargetInstrInfo *TII;

+ const TargetRegisterInfo *TRI;

+ ScheduleDAGRRList *scheduleDAG;

+ // SethiUllmanNumbers - The SethiUllman number for each node.

+ std::vector<unsigned> SethiUllmanNumbers;

+ public:

+ RegReductionPriorityQueue(const TargetInstrInfo *tii,

+ const TargetRegisterInfo *tri) :

+ Queue(SF(this)), currentQueueId(0),

+ TII(tii), TRI(tri), scheduleDAG(NULL) {}

+ void initNodes(std::vector<SUnit> &sunits) {

+ SUnits = &sunits;

+ // Add pseudo dependency edges for two-address nodes.

+ AddPseudoTwoAddrDeps();

+ // Reroute edges to nodes with multiple uses.

+ PrescheduleNodesWithMultipleUses();

+ // Calculate node priorities.

+ CalculateSethiUllmanNumbers();

+ }

+ void addNode(const SUnit *SU) {

+ unsigned SUSize = SethiUllmanNumbers.size();

+ if (SUnits->size() > SUSize)

+ SethiUllmanNumbers.resize(SUSize*2, 0);

+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);

+ }

+ void updateNode(const SUnit *SU) {

+ SethiUllmanNumbers[SU->NodeNum] = 0;

+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);

+ }

+ void releaseState() {

+ SUnits = 0;

+ SethiUllmanNumbers.clear();

+ }

+ unsigned getNodePriority(const SUnit *SU) const {

+ assert(SU->NodeNum < SethiUllmanNumbers.size());

+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;

+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)

+ // CopyToReg should be close to its uses to facilitate coalescing and

+ // avoid spilling.

+ return 0;

+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||

+ Opc == TargetInstrInfo::SUBREG_TO_REG ||

+ Opc == TargetInstrInfo::INSERT_SUBREG)

+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be

+ // close to their uses to facilitate coalescing.

+ return 0;

+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)

+ // If SU does not have a register use, i.e. it doesn't produce a value

+ // that would be consumed (e.g. store), then it terminates a chain of

+ // computation. Give it a large SethiUllman number so it will be

+ // scheduled right before its predecessors that it doesn't lengthen

+ // their live ranges.

+ return 0xffff;

+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)

+ // If SU does not have a register def, schedule it close to its uses

+ // because it does not lengthen any live ranges.

+ return 0;

+ return SethiUllmanNumbers[SU->NodeNum];

+ }

+ unsigned size() const { return Queue.size(); }

+ bool empty() const { return Queue.empty(); }

+ void push(SUnit *U) {

+ assert(!U->NodeQueueId && "Node in the queue already");

+ U->NodeQueueId = ++currentQueueId;

+ Queue.push(U);

+ }

+ void push_all(const std::vector<SUnit *> &Nodes) {

+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)

+ push(Nodes[i]);

+ }

+ SUnit *pop() {

+ if (empty()) return NULL;

+ SUnit *V = Queue.top();

+ Queue.pop();

+ V->NodeQueueId = 0;

+ return V;

+ }

+ void remove(SUnit *SU) {

+ assert(!Queue.empty() && "Queue is empty!");

+ assert(SU->NodeQueueId != 0 && "Not in queue!");

+ Queue.erase_one(SU);

+ SU->NodeQueueId = 0;

+ }

+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {

+ scheduleDAG = scheduleDag;

+ }

+ protected:

+ bool canClobber(const SUnit *SU, const SUnit *Op);

+ void AddPseudoTwoAddrDeps();

+ void PrescheduleNodesWithMultipleUses();

+ void CalculateSethiUllmanNumbers();

+ };

+ typedef RegReductionPriorityQueue<bu_ls_rr_sort>

+ BURegReductionPriorityQueue;

+ typedef RegReductionPriorityQueue<td_ls_rr_sort>

+ TDRegReductionPriorityQueue;

+/// closestSucc - Returns the scheduled cycle of the successor which is

+/// closest to the current cycle.

+static unsigned closestSucc(const SUnit *SU) {

+ unsigned MaxHeight = 0;

+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isCtrl()) continue; // ignore chain succs

+ unsigned Height = I->getSUnit()->getHeight();

+ // If there are bunch of CopyToRegs stacked up, they should be considered

+ // to be at the same position.

+ if (I->getSUnit()->getNode() &&

+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)

+ Height = closestSucc(I->getSUnit())+1;

+ if (Height > MaxHeight)

+ MaxHeight = Height;

+ }

+ return MaxHeight;

+/// calcMaxScratches - Returns an cost estimate of the worse case requirement

+/// for scratch registers, i.e. number of data dependencies.

+static unsigned calcMaxScratches(const SUnit *SU) {

+ unsigned Scratches = 0;

+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();

+ I != E; ++I) {

+ if (I->isCtrl()) continue; // ignore chain preds

+ Scratches++;

+ }

+ return Scratches;

+// Bottom up

+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {

+ unsigned LPriority = SPQ->getNodePriority(left);

+ unsigned RPriority = SPQ->getNodePriority(right);

+ if (LPriority != RPriority)

+ return LPriority > RPriority;

+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.

+ // e.g.

+ // t1 = op t2, c1

+ // t3 = op t4, c2

+ //

+ // and the following instructions are both ready.

+ // t2 = op c3

+ // t4 = op c4

+ //

+ // Then schedule t2 = op first.

+ // i.e.

+ // t4 = op c4

+ // t2 = op c3

+ // t1 = op t2, c1

+ // t3 = op t4, c2

+ //

+ // This creates more short live intervals.

+ unsigned LDist = closestSucc(left);

+ unsigned RDist = closestSucc(right);

+ if (LDist != RDist)

+ return LDist < RDist;

+ // How many registers becomes live when the node is scheduled.

+ unsigned LScratch = calcMaxScratches(left);

+ unsigned RScratch = calcMaxScratches(right);

+ if (LScratch != RScratch)

+ return LScratch > RScratch;

+ if (left->getHeight() != right->getHeight())

+ return left->getHeight() > right->getHeight();

+ if (left->getDepth() != right->getDepth())

+ return left->getDepth() < right->getDepth();

+ assert(left->NodeQueueId && right->NodeQueueId &&

+ "NodeQueueId cannot be zero");

+ return (left->NodeQueueId > right->NodeQueueId);

+template<class SF>

+bool

+RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {

+ if (SU->isTwoAddress) {

+ unsigned Opc = SU->getNode()->getMachineOpcode();

+ const TargetInstrDesc &TID = TII->get(Opc);

+ unsigned NumRes = TID.getNumDefs();

+ unsigned NumOps = TID.getNumOperands() - NumRes;

+ for (unsigned i = 0; i != NumOps; ++i) {

+ if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {

+ SDNode *DU = SU->getNode()->getOperand(i).getNode();

+ if (DU->getNodeId() != -1 &&

+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])

+ return true;

+ }

+ return false;

+/// hasCopyToRegUse - Return true if SU has a value successor that is a

+/// CopyToReg node.

+static bool hasCopyToRegUse(const SUnit *SU) {

+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ if (I->isCtrl()) continue;

+ const SUnit *SuccSU = I->getSUnit();

+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)

+ return true;

+ }

+ return false;

+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's

+/// physical register defs.

+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,

+ const TargetInstrInfo *TII,

+ const TargetRegisterInfo *TRI) {

+ SDNode *N = SuccSU->getNode();

+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();

+ const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();

+ assert(ImpDefs && "Caller should check hasPhysRegDefs");

+ for (const SDNode *SUNode = SU->getNode(); SUNode;

+ SUNode = SUNode->getFlaggedNode()) {

+ if (!SUNode->isMachineOpcode())

+ continue;

+ const unsigned *SUImpDefs =

+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();

+ if (!SUImpDefs)

+ return false;

+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {

+ MVT VT = N->getValueType(i);

+ if (VT == MVT::Flag || VT == MVT::Other)

+ continue;

+ if (!N->hasAnyUseOfValue(i))

+ continue;

+ unsigned Reg = ImpDefs[i - NumDefs];

+ for (;*SUImpDefs; ++SUImpDefs) {

+ unsigned SUReg = *SUImpDefs;

+ if (TRI->regsOverlap(Reg, SUReg))

+ return true;

+ }

+ return false;

+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses

+/// are not handled well by the general register pressure reduction

+/// heuristics. When presented with code like this:

+///

+/// N

+/// / |

+/// U store

+/// |

+/// ...

+///

+/// the heuristics tend to push the store up, but since the

+/// operand of the store has another use (U), this would increase

+/// the length of that other use (the U->N edge).

+///

+/// This function transforms code like the above to route U's

+/// dependence through the store when possible, like this:

+///

+/// N

+/// ||

+/// store

+/// |

+/// U

+/// |

+/// ...

+///

+/// This results in the store being scheduled immediately

+/// after N, which shortens the U->N live range, reducing

+/// register pressure.

+///

+template<class SF>

+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {

+ // Visit all the nodes in topological order, working top-down.

+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {

+ SUnit *SU = &(*SUnits)[i];

+ // For now, only look at nodes with no data successors, such as stores.

+ // These are especially important, due to the heuristics in

+ // getNodePriority for nodes with no data successors.

+ if (SU->NumSuccs != 0)

+ continue;

+ // For now, only look at nodes with exactly one data predecessor.

+ if (SU->NumPreds != 1)

+ continue;

+ // Avoid prescheduling copies to virtual registers, which don't behave

+ // like other nodes from the perspective of scheduling heuristics.

+ if (SDNode *N = SU->getNode())

+ if (N->getOpcode() == ISD::CopyToReg &&

+ TargetRegisterInfo::isVirtualRegister

+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))

+ continue;

+ // Locate the single data predecessor.

+ SUnit *PredSU = 0;

+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),

+ EE = SU->Preds.end(); II != EE; ++II)

+ if (!II->isCtrl()) {

+ PredSU = II->getSUnit();

+ break;

+ }

+ assert(PredSU);

+ // Don't rewrite edges that carry physregs, because that requires additional

+ // support infrastructure.

+ if (PredSU->hasPhysRegDefs)

+ continue;

+ // Short-circuit the case where SU is PredSU's only data successor.

+ if (PredSU->NumSuccs == 1)

+ continue;

+ // Avoid prescheduling to copies from virtual registers, which don't behave

+ // like other nodes from the perspective of scheduling // heuristics.

+ if (SDNode *N = SU->getNode())

+ if (N->getOpcode() == ISD::CopyFromReg &&

+ TargetRegisterInfo::isVirtualRegister

+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))

+ continue;

+ // Perform checks on the successors of PredSU.

+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),

+ EE = PredSU->Succs.end(); II != EE; ++II) {

+ SUnit *PredSuccSU = II->getSUnit();

+ if (PredSuccSU == SU) continue;

+ // If PredSU has another successor with no data successors, for

+ // now don't attempt to choose either over the other.

+ if (PredSuccSU->NumSuccs == 0)

+ goto outer_loop_continue;

+ // Don't break physical register dependencies.

+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)

+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))

+ goto outer_loop_continue;

+ // Don't introduce graph cycles.

+ if (scheduleDAG->IsReachable(SU, PredSuccSU))

+ goto outer_loop_continue;

+ }

+ // Ok, the transformation is safe and the heuristics suggest it is

+ // profitable. Update the graph.

+ DOUT << "Prescheduling SU # " << SU->NodeNum

+ << " next to PredSU # " << PredSU->NodeNum

+ << " to guide scheduling in the presence of multiple uses\n";

+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {

+ SDep Edge = PredSU->Succs[i];

+ assert(!Edge.isAssignedRegDep());

+ SUnit *SuccSU = Edge.getSUnit();

+ if (SuccSU != SU) {

+ Edge.setSUnit(PredSU);

+ scheduleDAG->RemovePred(SuccSU, Edge);

+ scheduleDAG->AddPred(SU, Edge);

+ Edge.setSUnit(SU);

+ scheduleDAG->AddPred(SuccSU, Edge);

+ --i;

+ }

+ outer_loop_continue:;

+ }

+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses

+/// it as a def&use operand. Add a pseudo control edge from it to the other

+/// node (if it won't create a cycle) so the two-address one will be scheduled

+/// first (lower in the schedule). If both nodes are two-address, favor the

+/// one that has a CopyToReg use (more likely to be a loop induction update).

+/// If both are two-address, but one is commutable while the other is not

+/// commutable, favor the one that's not commutable.

+template<class SF>

+void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {

+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {

+ SUnit *SU = &(*SUnits)[i];

+ if (!SU->isTwoAddress)

+ continue;

+ SDNode *Node = SU->getNode();

+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())

+ continue;

+ unsigned Opc = Node->getMachineOpcode();

+ const TargetInstrDesc &TID = TII->get(Opc);

+ unsigned NumRes = TID.getNumDefs();

+ unsigned NumOps = TID.getNumOperands() - NumRes;

+ for (unsigned j = 0; j != NumOps; ++j) {

+ if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)

+ continue;

+ SDNode *DU = SU->getNode()->getOperand(j).getNode();

+ if (DU->getNodeId() == -1)

+ continue;

+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];

+ if (!DUSU) continue;

+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),

+ E = DUSU->Succs.end(); I != E; ++I) {

+ if (I->isCtrl()) continue;

+ SUnit *SuccSU = I->getSUnit();

+ if (SuccSU == SU)

+ continue;

+ // Be conservative. Ignore if nodes aren't at roughly the same

+ // depth and height.

+ if (SuccSU->getHeight() < SU->getHeight() &&

+ (SU->getHeight() - SuccSU->getHeight()) > 1)

+ continue;

+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge

+ // constrains whatever is using the copy, instead of the copy

+ // itself. In the case that the copy is coalesced, this

+ // preserves the intent of the pseudo two-address heurietics.

+ while (SuccSU->Succs.size() == 1 &&

+ SuccSU->getNode()->isMachineOpcode() &&

+ SuccSU->getNode()->getMachineOpcode() ==

+ TargetInstrInfo::COPY_TO_REGCLASS)

+ SuccSU = SuccSU->Succs.front().getSUnit();

+ // Don't constrain non-instruction nodes.

+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())

+ continue;

+ // Don't constrain nodes with physical register defs if the

+ // predecessor can clobber them.

+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {

+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))

+ continue;

+ }

+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;

+ // these may be coalesced away. We want them close to their uses.

+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();

+ if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||

+ SuccOpc == TargetInstrInfo::INSERT_SUBREG ||

+ SuccOpc == TargetInstrInfo::SUBREG_TO_REG)

+ continue;

+ if ((!canClobber(SuccSU, DUSU) ||

+ (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||

+ (!SU->isCommutable && SuccSU->isCommutable)) &&

+ !scheduleDAG->IsReachable(SuccSU, SU)) {

+ DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum

+ << " to SU #" << SuccSU->NodeNum << "\n";

+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,

+ /*Reg=*/0, /*isNormalMemory=*/false,

+ /*isMustAlias=*/false,

+ /*isArtificial=*/true));

+ }

+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all

+/// scheduling units.

+template<class SF>

+void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {

+ SethiUllmanNumbers.assign(SUnits->size(), 0);

+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)

+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);

+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled

+/// predecessors of the successors of the SUnit SU. Stop when the provided

+/// limit is exceeded.

+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,

+ unsigned Limit) {

+ unsigned Sum = 0;

+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();

+ I != E; ++I) {

+ const SUnit *SuccSU = I->getSUnit();

+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),

+ EE = SuccSU->Preds.end(); II != EE; ++II) {

+ SUnit *PredSU = II->getSUnit();

+ if (!PredSU->isScheduled)

+ if (++Sum > Limit)

+ return Sum;

+ }

+ return Sum;

+// Top down

+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {

+ unsigned LPriority = SPQ->getNodePriority(left);

+ unsigned RPriority = SPQ->getNodePriority(right);

+ bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();

+ bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();

+ bool LIsFloater = LIsTarget && left->NumPreds == 0;

+ bool RIsFloater = RIsTarget && right->NumPreds == 0;

+ unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;

+ unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;

+ if (left->NumSuccs == 0 && right->NumSuccs != 0)

+ return false;

+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)

+ return true;

+ if (LIsFloater)

+ LBonus -= 2;

+ if (RIsFloater)

+ RBonus -= 2;

+ if (left->NumSuccs == 1)

+ LBonus += 2;

+ if (right->NumSuccs == 1)

+ RBonus += 2;

+ if (LPriority+LBonus != RPriority+RBonus)

+ return LPriority+LBonus < RPriority+RBonus;

+ if (left->getDepth() != right->getDepth())

+ return left->getDepth() < right->getDepth();

+ if (left->NumSuccsLeft != right->NumSuccsLeft)

+ return left->NumSuccsLeft > right->NumSuccsLeft;

+ assert(left->NodeQueueId && right->NodeQueueId &&

+ "NodeQueueId cannot be zero");

+ return (left->NodeQueueId > right->NodeQueueId);

+//===----------------------------------------------------------------------===//

+// Public Constructor Functions

+//===----------------------------------------------------------------------===//

+llvm::ScheduleDAGSDNodes *

+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {

+ const TargetMachine &TM = IS->TM;

+ const TargetInstrInfo *TII = TM.getInstrInfo();

+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();

+ BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);

+ ScheduleDAGRRList *SD =

+ new ScheduleDAGRRList(*IS->MF, true, PQ);

+ PQ->setScheduleDAG(SD);

+ return SD;

+llvm::ScheduleDAGSDNodes *

+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {

+ const TargetMachine &TM = IS->TM;

+ const TargetInstrInfo *TII = TM.getInstrInfo();

+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();

+ TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);

+ ScheduleDAGRRList *SD =

+ new ScheduleDAGRRList(*IS->MF, false, PQ);

+ PQ->setScheduleDAG(SD);

+ return SD;

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..7aa15bcc6862
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

@@ -0,0 +1,294 @@

+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the ScheduleDAG class, which is a base class used by

+// scheduling implementation classes.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "pre-RA-sched"

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/raw_ostream.h"

+using namespace llvm;

+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)

+ : ScheduleDAG(mf) {

+/// Run - perform scheduling.

+///

+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,

+ MachineBasicBlock::iterator insertPos) {

+ DAG = dag;

+ ScheduleDAG::Run(bb, insertPos);

+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {

+ SUnit *SU = NewSUnit(Old->getNode());

+ SU->OrigNode = Old->OrigNode;

+ SU->Latency = Old->Latency;

+ SU->isTwoAddress = Old->isTwoAddress;

+ SU->isCommutable = Old->isCommutable;

+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;

+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;

+ Old->isCloned = true;

+ return SU;

+/// CheckForPhysRegDependency - Check if the dependency between def and use of

+/// a specified operand is a physical register dependency. If so, returns the

+/// register and the cost of copying the register.

+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,

+ const TargetRegisterInfo *TRI,

+ const TargetInstrInfo *TII,

+ unsigned &PhysReg, int &Cost) {

+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)

+ return;

+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

+ if (TargetRegisterInfo::isVirtualRegister(Reg))

+ return;

+ unsigned ResNo = User->getOperand(2).getResNo();

+ if (Def->isMachineOpcode()) {

+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());

+ if (ResNo >= II.getNumDefs() &&

+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {

+ PhysReg = Reg;

+ const TargetRegisterClass *RC =

+ TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));

+ Cost = RC->getCopyCost();

+ }

+void ScheduleDAGSDNodes::BuildSchedUnits() {

+ // During scheduling, the NodeId field of SDNode is used to map SDNodes

+ // to their associated SUnits by holding SUnits table indices. A value

+ // of -1 means the SDNode does not yet have an associated SUnit.

+ unsigned NumNodes = 0;

+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),

+ E = DAG->allnodes_end(); NI != E; ++NI) {

+ NI->setNodeId(-1);

+ ++NumNodes;

+ }

+ // Reserve entries in the vector for each of the SUnits we are creating. This

+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get

+ // invalidated.

+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.

+ // This is a temporary workaround.

+ SUnits.reserve(NumNodes * 2);

+ // Check to see if the scheduler cares about latencies.

+ bool UnitLatencies = ForceUnitLatencies();

+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),

+ E = DAG->allnodes_end(); NI != E; ++NI) {

+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.

+ continue;

+ // If this node has already been processed, stop now.

+ if (NI->getNodeId() != -1) continue;

+ SUnit *NodeSUnit = NewSUnit(NI);

+ // See if anything is flagged to this node, if so, add them to flagged

+ // nodes. Nodes can have at most one flag input and one flag output. Flags

+ // are required to be the last operand and result of a node.

+ // Scan up to find flagged preds.

+ SDNode *N = NI;

+ while (N->getNumOperands() &&

+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {

+ N = N->getOperand(N->getNumOperands()-1).getNode();

+ assert(N->getNodeId() == -1 && "Node already inserted!");

+ N->setNodeId(NodeSUnit->NodeNum);

+ }

+ // Scan down to find any flagged succs.

+ N = NI;

+ while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {

+ SDValue FlagVal(N, N->getNumValues()-1);

+ // There are either zero or one users of the Flag result.

+ bool HasFlagUse = false;

+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();

+ UI != E; ++UI)

+ if (FlagVal.isOperandOf(*UI)) {

+ HasFlagUse = true;

+ assert(N->getNodeId() == -1 && "Node already inserted!");

+ N->setNodeId(NodeSUnit->NodeNum);

+ N = *UI;

+ break;

+ }

+ if (!HasFlagUse) break;

+ }

+ // If there are flag operands involved, N is now the bottom-most node

+ // of the sequence of nodes that are flagged together.

+ // Update the SUnit.

+ NodeSUnit->setNode(N);

+ assert(N->getNodeId() == -1 && "Node already inserted!");

+ N->setNodeId(NodeSUnit->NodeNum);

+ // Assign the Latency field of NodeSUnit using target-provided information.

+ if (UnitLatencies)

+ NodeSUnit->Latency = 1;

+ else

+ ComputeLatency(NodeSUnit);

+ }

+void ScheduleDAGSDNodes::AddSchedEdges() {

+ // Pass 2: add the preds, succs, etc.

+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {

+ SUnit *SU = &SUnits[su];

+ SDNode *MainNode = SU->getNode();

+ if (MainNode->isMachineOpcode()) {

+ unsigned Opc = MainNode->getMachineOpcode();

+ const TargetInstrDesc &TID = TII->get(Opc);

+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {

+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {

+ SU->isTwoAddress = true;

+ break;

+ }

+ if (TID.isCommutable())

+ SU->isCommutable = true;

+ }

+ // Find all predecessors and successors of the group.

+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {

+ if (N->isMachineOpcode() &&

+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {

+ SU->hasPhysRegClobbers = true;

+ unsigned NumUsed = CountResults(N);

+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))

+ --NumUsed; // Skip over unused values at the end.

+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())

+ SU->hasPhysRegDefs = true;

+ }

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ SDNode *OpN = N->getOperand(i).getNode();

+ if (isPassiveNode(OpN)) continue; // Not scheduled.

+ SUnit *OpSU = &SUnits[OpN->getNodeId()];

+ assert(OpSU && "Node has no SUnit!");

+ if (OpSU == SU) continue; // In the same group.

+ MVT OpVT = N->getOperand(i).getValueType();

+ assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");

+ bool isChain = OpVT == MVT::Other;

+ unsigned PhysReg = 0;

+ int Cost = 1;

+ // Determine if this is a physical register dependency.

+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);

+ assert((PhysReg == 0 || !isChain) &&

+ "Chain dependence via physreg data?");

+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler

+ // emits a copy from the physical register to a virtual register unless

+ // it requires a cross class copy (cost < 0). That means we are only

+ // treating "expensive to copy" register dependency as physical register

+ // dependency. This may change in the future though.

+ if (Cost >= 0)

+ PhysReg = 0;

+ SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,

+ OpSU->Latency, PhysReg));

+ }

+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we

+/// are input. This SUnit graph is similar to the SelectionDAG, but

+/// excludes nodes that aren't interesting to scheduling, and represents

+/// flagged together nodes with a single SUnit.

+void ScheduleDAGSDNodes::BuildSchedGraph() {

+ // Populate the SUnits array.

+ BuildSchedUnits();

+ // Compute all the scheduling dependencies between nodes.

+ AddSchedEdges();

+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {

+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();

+ // Compute the latency for the node. We use the sum of the latencies for

+ // all nodes flagged together into this SUnit.

+ SU->Latency = 0;

+ bool SawMachineOpcode = false;

+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())

+ if (N->isMachineOpcode()) {

+ SawMachineOpcode = true;

+ SU->Latency +=

+ InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());

+ }

+/// CountResults - The results of target nodes have register or immediate

+/// operands first, then an optional chain, and optional flag operands (which do

+/// not go into the resulting MachineInstr).

+unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {

+ unsigned N = Node->getNumValues();

+ while (N && Node->getValueType(N - 1) == MVT::Flag)

+ --N;

+ if (N && Node->getValueType(N - 1) == MVT::Other)

+ --N; // Skip over chain result.

+ return N;

+/// CountOperands - The inputs to target nodes have any actual inputs first,

+/// followed by special operands that describe memory references, then an

+/// optional chain operand, then an optional flag operand. Compute the number

+/// of actual operands that will go into the resulting MachineInstr.

+unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {

+ unsigned N = ComputeMemOperandsEnd(Node);

+ while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))

+ --N; // Ignore MEMOPERAND nodes

+ return N;

+/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode

+/// operand

+unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {

+ unsigned N = Node->getNumOperands();

+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)

+ --N;

+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)

+ --N; // Ignore chain if it exists.

+ return N;

+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {

+ if (!SU->getNode()) {

+ cerr << "PHYS REG COPY\n";

+ return;

+ }

+ SU->getNode()->dump(DAG);

+ cerr << "\n";

+ SmallVector<SDNode *, 4> FlaggedNodes;

+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())

+ FlaggedNodes.push_back(N);

+ while (!FlaggedNodes.empty()) {

+ cerr << " ";

+ FlaggedNodes.back()->dump(DAG);

+ cerr << "\n";

+ FlaggedNodes.pop_back();

+ }

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..2a278b749a8c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h

@@ -0,0 +1,179 @@

+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the ScheduleDAGSDNodes class, which implements

+// scheduling for an SDNode-based dependency graph.

+//

+//===----------------------------------------------------------------------===//

+#ifndef SCHEDULEDAGSDNODES_H

+#define SCHEDULEDAGSDNODES_H

+#include "llvm/CodeGen/ScheduleDAG.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+namespace llvm {

+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.

+ ///

+ /// Edges between SUnits are initially based on edges in the SelectionDAG,

+ /// and additional edges can be added by the schedulers as heuristics.

+ /// SDNodes such as Constants, Registers, and a few others that are not

+ /// interesting to schedulers are not allocated SUnits.

+ ///

+ /// SDNodes with MVT::Flag operands are grouped along with the flagged

+ /// nodes into a single SUnit so that they are scheduled together.

+ ///

+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output

+ /// edges. Physical register dependence information is not carried in

+ /// the DAG and must be handled explicitly by schedulers.

+ ///

+ class ScheduleDAGSDNodes : public ScheduleDAG {

+ public:

+ SelectionDAG *DAG; // DAG of the current basic block

+ explicit ScheduleDAGSDNodes(MachineFunction &mf);

+ virtual ~ScheduleDAGSDNodes() {}

+ /// Run - perform scheduling.

+ ///

+ void Run(SelectionDAG *dag, MachineBasicBlock *bb,

+ MachineBasicBlock::iterator insertPos);

+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.

+ ///

+ static bool isPassiveNode(SDNode *Node) {

+ if (isa<ConstantSDNode>(Node)) return true;

+ if (isa<ConstantFPSDNode>(Node)) return true;

+ if (isa<RegisterSDNode>(Node)) return true;

+ if (isa<GlobalAddressSDNode>(Node)) return true;

+ if (isa<BasicBlockSDNode>(Node)) return true;

+ if (isa<FrameIndexSDNode>(Node)) return true;

+ if (isa<ConstantPoolSDNode>(Node)) return true;

+ if (isa<JumpTableSDNode>(Node)) return true;

+ if (isa<ExternalSymbolSDNode>(Node)) return true;

+ if (isa<MemOperandSDNode>(Node)) return true;

+ if (Node->getOpcode() == ISD::EntryToken) return true;

+ return false;

+ }

+ /// NewSUnit - Creates a new SUnit and return a ptr to it.

+ ///

+ SUnit *NewSUnit(SDNode *N) {

+#ifndef NDEBUG

+ const SUnit *Addr = 0;

+ if (!SUnits.empty())

+ Addr = &SUnits[0];

+#endif

+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));

+ assert((Addr == 0 || Addr == &SUnits[0]) &&

+ "SUnits std::vector reallocated on the fly!");

+ SUnits.back().OrigNode = &SUnits.back();

+ return &SUnits.back();

+ }

+ /// Clone - Creates a clone of the specified SUnit. It does not copy the

+ /// predecessors / successors info nor the temporary scheduling states.

+ ///

+ SUnit *Clone(SUnit *N);

+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we

+ /// are input. This SUnit graph is similar to the SelectionDAG, but

+ /// excludes nodes that aren't interesting to scheduling, and represents

+ /// flagged together nodes with a single SUnit.

+ virtual void BuildSchedGraph();

+ /// ComputeLatency - Compute node latency.

+ ///

+ virtual void ComputeLatency(SUnit *SU);

+ /// CountResults - The results of target nodes have register or immediate

+ /// operands first, then an optional chain, and optional flag operands

+ /// (which do not go into the machine instrs.)

+ static unsigned CountResults(SDNode *Node);

+ /// CountOperands - The inputs to target nodes have any actual inputs first,

+ /// followed by special operands that describe memory references, then an

+ /// optional chain operand, then flag operands. Compute the number of

+ /// actual operands that will go into the resulting MachineInstr.

+ static unsigned CountOperands(SDNode *Node);

+ /// ComputeMemOperandsEnd - Find the index one past the last

+ /// MemOperandSDNode operand

+ static unsigned ComputeMemOperandsEnd(SDNode *Node);

+ /// EmitNode - Generate machine code for an node and needed dependencies.

+ /// VRBaseMap contains, for each already emitted node, the first virtual

+ /// register number for the results of the node.

+ ///

+ void EmitNode(SDNode *Node, bool IsClone, bool HasClone,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ virtual MachineBasicBlock *EmitSchedule();

+ /// Schedule - Order nodes according to selected style, filling

+ /// in the Sequence member.

+ ///

+ virtual void Schedule() = 0;

+ virtual void dumpNode(const SUnit *SU) const;

+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;

+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;

+ private:

+ /// EmitSubregNode - Generate machine code for subreg nodes.

+ ///

+ void EmitSubregNode(SDNode *Node,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS

+ /// nodes.

+ ///

+ void EmitCopyToRegClassNode(SDNode *Node,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// getVR - Return the virtual register corresponding to the specified result

+ /// of the specified node.

+ unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// getDstOfCopyToRegUse - If the only use of the specified result number of

+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.

+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const;

+ void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum,

+ const TargetInstrDesc *II,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// AddRegisterOperand - Add the specified register as an operand to the

+ /// specified machine instr. Insert register copies if the register is

+ /// not in the required register class.

+ void AddRegisterOperand(MachineInstr *MI, SDValue Op,

+ unsigned IIOpNum, const TargetInstrDesc *II,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an

+ /// implicit physical register output.

+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,

+ bool IsCloned, unsigned SrcReg,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,

+ const TargetInstrDesc &II, bool IsClone,

+ bool IsCloned,

+ DenseMap<SDValue, unsigned> &VRBaseMap);

+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.

+ void BuildSchedUnits();

+ void AddSchedEdges();

+ };

+#endif

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
new file mode 100644
index 000000000000..fb5e207e81bb
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp

@@ -0,0 +1,668 @@

+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the Emit routines for the ScheduleDAG class, which creates

+// MachineInstrs according to the computed schedule.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "pre-RA-sched"

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/CodeGen/MachineConstantPool.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineInstrBuilder.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/ADT/Statistic.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/MathExtras.h"

+using namespace llvm;

+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an

+/// implicit physical register output.

+void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,

+ bool IsClone, bool IsCloned,

+ unsigned SrcReg,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ unsigned VRBase = 0;

+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {

+ // Just use the input register directly!

+ SDValue Op(Node, ResNo);

+ if (IsClone)

+ VRBaseMap.erase(Op);

+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;

+ isNew = isNew; // Silence compiler warning.

+ assert(isNew && "Node emitted out of order - early");

+ return;

+ }

+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use

+ // the CopyToReg'd destination register instead of creating a new vreg.

+ bool MatchReg = true;

+ const TargetRegisterClass *UseRC = NULL;

+ if (!IsClone && !IsCloned)

+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();

+ UI != E; ++UI) {

+ SDNode *User = *UI;

+ bool Match = true;

+ if (User->getOpcode() == ISD::CopyToReg &&

+ User->getOperand(2).getNode() == Node &&

+ User->getOperand(2).getResNo() == ResNo) {

+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {

+ VRBase = DestReg;

+ Match = false;

+ } else if (DestReg != SrcReg)

+ Match = false;

+ } else {

+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

+ SDValue Op = User->getOperand(i);

+ if (Op.getNode() != Node || Op.getResNo() != ResNo)

+ continue;

+ MVT VT = Node->getValueType(Op.getResNo());

+ if (VT == MVT::Other || VT == MVT::Flag)

+ continue;

+ Match = false;

+ if (User->isMachineOpcode()) {

+ const TargetInstrDesc &II = TII->get(User->getMachineOpcode());

+ const TargetRegisterClass *RC =

+ getInstrOperandRegClass(TRI, II, i+II.getNumDefs());

+ if (!UseRC)

+ UseRC = RC;

+ else if (RC) {

+ if (UseRC->hasSuperClass(RC))

+ UseRC = RC;

+ else

+ assert((UseRC == RC || RC->hasSuperClass(UseRC)) &&

+ "Multiple uses expecting different register classes!");

+ }

+ MatchReg &= Match;

+ if (VRBase)

+ break;

+ }

+ MVT VT = Node->getValueType(ResNo);

+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;

+ SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);

+ // Figure out the register class to create for the destreg.

+ if (VRBase) {

+ DstRC = MRI.getRegClass(VRBase);

+ } else if (UseRC) {

+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");

+ DstRC = UseRC;

+ } else {

+ DstRC = TLI->getRegClassFor(VT);

+ }

+ // If all uses are reading from the src physical register and copying the

+ // register is either impossible or very expensive, then don't create a copy.

+ if (MatchReg && SrcRC->getCopyCost() < 0) {

+ VRBase = SrcReg;

+ } else {

+ // Create the reg, emit the copy.

+ VRBase = MRI.createVirtualRegister(DstRC);

+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,

+ DstRC, SrcRC);

+ assert(Emitted && "Unable to issue a copy instruction!\n");

+ (void) Emitted;

+ }

+ SDValue Op(Node, ResNo);

+ if (IsClone)

+ VRBaseMap.erase(Op);

+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;

+ isNew = isNew; // Silence compiler warning.

+ assert(isNew && "Node emitted out of order - early");

+/// getDstOfCopyToRegUse - If the only use of the specified result number of

+/// node is a CopyToReg, return its destination register. Return 0 otherwise.

+unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node,

+ unsigned ResNo) const {

+ if (!Node->hasOneUse())

+ return 0;

+ SDNode *User = *Node->use_begin();

+ if (User->getOpcode() == ISD::CopyToReg &&

+ User->getOperand(2).getNode() == Node &&

+ User->getOperand(2).getResNo() == ResNo) {

+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

+ if (TargetRegisterInfo::isVirtualRegister(Reg))

+ return Reg;

+ }

+ return 0;

+void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,

+ const TargetInstrDesc &II,

+ bool IsClone, bool IsCloned,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&

+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");

+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {

+ // If the specific node value is only used by a CopyToReg and the dest reg

+ // is a vreg in the same register class, use the CopyToReg'd destination

+ // register instead of creating a new vreg.

+ unsigned VRBase = 0;

+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i);

+ if (!IsClone && !IsCloned)

+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();

+ UI != E; ++UI) {

+ SDNode *User = *UI;

+ if (User->getOpcode() == ISD::CopyToReg &&

+ User->getOperand(2).getNode() == Node &&

+ User->getOperand(2).getResNo() == i) {

+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {

+ const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);

+ if (RegRC == RC) {

+ VRBase = Reg;

+ MI->addOperand(MachineOperand::CreateReg(Reg, true));

+ break;

+ }

+ // Create the result registers for this node and add the result regs to

+ // the machine instruction.

+ if (VRBase == 0) {

+ assert(RC && "Isn't a register operand!");

+ VRBase = MRI.createVirtualRegister(RC);

+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));

+ }

+ SDValue Op(Node, i);

+ if (IsClone)

+ VRBaseMap.erase(Op);

+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;

+ isNew = isNew; // Silence compiler warning.

+ assert(isNew && "Node emitted out of order - early");

+ }

+/// getVR - Return the virtual register corresponding to the specified result

+/// of the specified node.

+unsigned ScheduleDAGSDNodes::getVR(SDValue Op,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ if (Op.isMachineOpcode() &&

+ Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {

+ // Add an IMPLICIT_DEF instruction before every use.

+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());

+ // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc

+ // does not include operand register class info.

+ if (!VReg) {

+ const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());

+ VReg = MRI.createVirtualRegister(RC);

+ }

+ BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg);

+ return VReg;

+ }

+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);

+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");

+ return I->second;

+/// AddRegisterOperand - Add the specified register as an operand to the

+/// specified machine instr. Insert register copies if the register is

+/// not in the required register class.

+void

+ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op,

+ unsigned IIOpNum,

+ const TargetInstrDesc *II,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ assert(Op.getValueType() != MVT::Other &&

+ Op.getValueType() != MVT::Flag &&

+ "Chain and flag operands should occur at end of operand list!");

+ // Get/emit the operand.

+ unsigned VReg = getVR(Op, VRBaseMap);

+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");

+ const TargetInstrDesc &TID = MI->getDesc();

+ bool isOptDef = IIOpNum < TID.getNumOperands() &&

+ TID.OpInfo[IIOpNum].isOptionalDef();

+ // If the instruction requires a register in a different class, create

+ // a new virtual register and copy the value into it.

+ if (II) {

+ const TargetRegisterClass *SrcRC =

+ MRI.getRegClass(VReg);

+ const TargetRegisterClass *DstRC =

+ getInstrOperandRegClass(TRI, *II, IIOpNum);

+ assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&

+ "Don't have operand info for this instruction!");

+ if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {

+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);

+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,

+ DstRC, SrcRC);

+ assert(Emitted && "Unable to issue a copy instruction!\n");

+ (void) Emitted;

+ VReg = NewVReg;

+ }

+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));

+/// AddOperand - Add the specified operand to the specified machine instr. II

+/// specifies the instruction information for the node, and IIOpNum is the

+/// operand number (in the II) that we are adding. IIOpNum and II are used for

+/// assertions only.

+void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,

+ unsigned IIOpNum,

+ const TargetInstrDesc *II,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ if (Op.isMachineOpcode()) {

+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);

+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateImm(C->getZExtValue()));

+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {

+ const ConstantFP *CFP = F->getConstantFPValue();

+ MI->addOperand(MachineOperand::CreateFPImm(CFP));

+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));

+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset()));

+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));

+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));

+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateJTI(JT->getIndex()));

+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {

+ int Offset = CP->getOffset();

+ unsigned Align = CP->getAlignment();

+ const Type *Type = CP->getType();

+ // MachineConstantPool wants an explicit alignment.

+ if (Align == 0) {

+ Align = TM.getTargetData()->getPrefTypeAlignment(Type);

+ if (Align == 0) {

+ // Alignment of vector types. FIXME!

+ Align = TM.getTargetData()->getTypeAllocSize(Type);

+ }

+ unsigned Idx;

+ if (CP->isMachineConstantPoolEntry())

+ Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);

+ else

+ Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);

+ MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));

+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {

+ MI->addOperand(MachineOperand::CreateES(ES->getSymbol()));

+ } else {

+ assert(Op.getValueType() != MVT::Other &&

+ Op.getValueType() != MVT::Flag &&

+ "Chain and flag operands should occur at end of operand list!");

+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);

+ }

+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose

+/// "SubIdx"'th sub-register class is the specified register class and whose

+/// type matches the specified type.

+static const TargetRegisterClass*

+getSuperRegisterRegClass(const TargetRegisterClass *TRC,

+ unsigned SubIdx, MVT VT) {

+ // Pick the register class of the superegister for this type

+ for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),

+ E = TRC->superregclasses_end(); I != E; ++I)

+ if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)

+ return *I;

+ assert(false && "Couldn't find the register class");

+ return 0;

+/// EmitSubregNode - Generate machine code for subreg nodes.

+///

+void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ unsigned VRBase = 0;

+ unsigned Opc = Node->getMachineOpcode();

+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use

+ // the CopyToReg'd destination register instead of creating a new vreg.

+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();

+ UI != E; ++UI) {

+ SDNode *User = *UI;

+ if (User->getOpcode() == ISD::CopyToReg &&

+ User->getOperand(2).getNode() == Node) {

+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {

+ VRBase = DestReg;

+ break;

+ }

+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {

+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

+ // Create the extract_subreg machine instruction.

+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),

+ TII->get(TargetInstrInfo::EXTRACT_SUBREG));

+ // Figure out the register class to create for the destreg.

+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);

+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);

+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);

+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");

+ // Figure out the register class to create for the destreg.

+ // Note that if we're going to directly use an existing register,

+ // it must be precisely the required class, and not a subclass

+ // thereof.

+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {

+ // Create the reg

+ assert(SRC && "Couldn't find source register class");

+ VRBase = MRI.createVirtualRegister(SRC);

+ }

+ // Add def, source, and subreg index

+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));

+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);

+ MI->addOperand(MachineOperand::CreateImm(SubIdx));

+ BB->insert(InsertPos, MI);

+ } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||

+ Opc == TargetInstrInfo::SUBREG_TO_REG) {

+ SDValue N0 = Node->getOperand(0);

+ SDValue N1 = Node->getOperand(1);

+ SDValue N2 = Node->getOperand(2);

+ unsigned SubReg = getVR(N1, VRBaseMap);

+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();

+ const TargetRegisterClass *TRC = MRI.getRegClass(SubReg);

+ const TargetRegisterClass *SRC =

+ getSuperRegisterRegClass(TRC, SubIdx,

+ Node->getValueType(0));

+ // Figure out the register class to create for the destreg.

+ // Note that if we're going to directly use an existing register,

+ // it must be precisely the required class, and not a subclass

+ // thereof.

+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {

+ // Create the reg

+ assert(SRC && "Couldn't find source register class");

+ VRBase = MRI.createVirtualRegister(SRC);

+ }

+ // Create the insert_subreg or subreg_to_reg machine instruction.

+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc));

+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));

+ // If creating a subreg_to_reg, then the first input operand

+ // is an implicit value immediate, otherwise it's a register

+ if (Opc == TargetInstrInfo::SUBREG_TO_REG) {

+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);

+ MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));

+ } else

+ AddOperand(MI, N0, 0, 0, VRBaseMap);

+ // Add the subregster being inserted

+ AddOperand(MI, N1, 0, 0, VRBaseMap);

+ MI->addOperand(MachineOperand::CreateImm(SubIdx));

+ BB->insert(InsertPos, MI);

+ } else

+ assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg");

+ SDValue Op(Node, 0);

+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;

+ isNew = isNew; // Silence compiler warning.

+ assert(isNew && "Node emitted out of order - early");

+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.

+/// COPY_TO_REGCLASS is just a normal copy, except that the destination

+/// register is constrained to be in a particular register class.

+///

+void

+ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);

+ const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg);

+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

+ const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);

+ // Create the new VReg in the destination class and emit a copy.

+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);

+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,

+ DstRC, SrcRC);

+ assert(Emitted &&

+ "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");

+ (void) Emitted;

+ SDValue Op(Node, 0);

+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;

+ isNew = isNew; // Silence compiler warning.

+ assert(isNew && "Node emitted out of order - early");

+/// EmitNode - Generate machine code for an node and needed dependencies.

+///

+void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,

+ DenseMap<SDValue, unsigned> &VRBaseMap) {

+ // If machine instruction

+ if (Node->isMachineOpcode()) {

+ unsigned Opc = Node->getMachineOpcode();

+ // Handle subreg insert/extract specially

+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||

+ Opc == TargetInstrInfo::INSERT_SUBREG ||

+ Opc == TargetInstrInfo::SUBREG_TO_REG) {

+ EmitSubregNode(Node, VRBaseMap);

+ return;

+ }

+ // Handle COPY_TO_REGCLASS specially.

+ if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {

+ EmitCopyToRegClassNode(Node, VRBaseMap);

+ return;

+ }

+ if (Opc == TargetInstrInfo::IMPLICIT_DEF)

+ // We want a unique VR for each IMPLICIT_DEF use.

+ return;

+ const TargetInstrDesc &II = TII->get(Opc);

+ unsigned NumResults = CountResults(Node);

+ unsigned NodeOperands = CountOperands(Node);

+ unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node);

+ bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&

+ II.getImplicitDefs() != 0;

+#ifndef NDEBUG

+ unsigned NumMIOperands = NodeOperands + NumResults;

+ assert((II.getNumOperands() == NumMIOperands ||

+ HasPhysRegOuts || II.isVariadic()) &&

+ "#operands for dag node doesn't match .td file!");

+#endif

+ // Create the new machine instruction.

+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II);

+ // Add result register values for things that are defined by this

+ // instruction.

+ if (NumResults)

+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);

+ // Emit all of the actual operands of this instruction, adding them to the

+ // instruction as appropriate.

+ for (unsigned i = 0; i != NodeOperands; ++i)

+ AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap);

+ // Emit all of the memory operands of this instruction

+ for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i)

+ AddMemOperand(MI, cast<MemOperandSDNode>(Node->getOperand(i))->MO);

+ if (II.usesCustomDAGSchedInsertionHook()) {

+ // Insert this instruction into the basic block using a target

+ // specific inserter which may returns a new basic block.

+ BB = TLI->EmitInstrWithCustomInserter(MI, BB);

+ InsertPos = BB->end();

+ } else {

+ BB->insert(InsertPos, MI);

+ }

+ // Additional results must be an physical register def.

+ if (HasPhysRegOuts) {

+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {

+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];

+ if (Node->hasAnyUseOfValue(i))

+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);

+ }

+ return;

+ }

+ switch (Node->getOpcode()) {

+ default:

+#ifndef NDEBUG

+ Node->dump(DAG);

+#endif

+ assert(0 && "This target-independent node should have been selected!");

+ break;

+ case ISD::EntryToken:

+ assert(0 && "EntryToken should have been excluded from the schedule!");

+ break;

+ case ISD::TokenFactor: // fall thru

+ break;

+ case ISD::CopyToReg: {

+ unsigned SrcReg;

+ SDValue SrcVal = Node->getOperand(2);

+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))

+ SrcReg = R->getReg();

+ else

+ SrcReg = getVR(SrcVal, VRBaseMap);

+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();

+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.

+ break;

+ const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;

+ // Get the register classes of the src/dst.

+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))

+ SrcTRC = MRI.getRegClass(SrcReg);

+ else

+ SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());

+ if (TargetRegisterInfo::isVirtualRegister(DestReg))

+ DstTRC = MRI.getRegClass(DestReg);

+ else

+ DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,

+ Node->getOperand(1).getValueType());

+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg,

+ DstTRC, SrcTRC);

+ assert(Emitted && "Unable to issue a copy instruction!\n");

+ (void) Emitted;

+ break;

+ }

+ case ISD::CopyFromReg: {

+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();

+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);

+ break;

+ }

+ case ISD::INLINEASM: {

+ unsigned NumOps = Node->getNumOperands();

+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)

+ --NumOps; // Ignore the flag operand.

+ // Create the inline asm machine instruction.

+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),

+ TII->get(TargetInstrInfo::INLINEASM));

+ // Add the asm string as an external symbol operand.

+ const char *AsmStr =

+ cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();

+ MI->addOperand(MachineOperand::CreateES(AsmStr));

+ // Add all of the operand registers to the instruction.

+ for (unsigned i = 2; i != NumOps;) {

+ unsigned Flags =

+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();

+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);

+ MI->addOperand(MachineOperand::CreateImm(Flags));

+ ++i; // Skip the ID value.

+ switch (Flags & 7) {

+ default: assert(0 && "Bad flags!");

+ case 2: // Def of register.

+ for (; NumVals; --NumVals, ++i) {

+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

+ MI->addOperand(MachineOperand::CreateReg(Reg, true));

+ }

+ break;

+ case 6: // Def of earlyclobber register.

+ for (; NumVals; --NumVals, ++i) {

+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

+ MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,

+ false, 0, true));

+ }

+ break;

+ case 1: // Use of register.

+ case 3: // Immediate.

+ case 4: // Addressing mode.

+ // The addressing mode has been selected, just add all of the

+ // operands to the machine instruction.

+ for (; NumVals; --NumVals, ++i)

+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);

+ break;

+ }

+ BB->insert(InsertPos, MI);

+ break;

+ }

+/// EmitSchedule - Emit the machine code in scheduled order.

+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {

+ DenseMap<SDValue, unsigned> VRBaseMap;

+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;

+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {

+ SUnit *SU = Sequence[i];

+ if (!SU) {

+ // Null SUnit* is a noop.

+ EmitNoop();

+ continue;

+ }

+ // For pre-regalloc scheduling, create instructions corresponding to the

+ // SDNode and any flagged SDNodes and append them to the block.

+ if (!SU->getNode()) {

+ // Emit a copy.

+ EmitPhysRegCopy(SU, CopyVRBaseMap);

+ continue;

+ }

+ SmallVector<SDNode *, 4> FlaggedNodes;

+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N;

+ N = N->getFlaggedNode())

+ FlaggedNodes.push_back(N);

+ while (!FlaggedNodes.empty()) {

+ EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap);

+ FlaggedNodes.pop_back();

+ }

+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);

+ }

+ return BB;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..195896ee89dc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -0,0 +1,5743 @@

+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the SelectionDAG class.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/Constants.h"

+#include "llvm/Analysis/ValueTracking.h"

+#include "llvm/GlobalAlias.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/Intrinsics.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/Assembly/Writer.h"

+#include "llvm/CallingConv.h"

+#include "llvm/CodeGen/MachineBasicBlock.h"

+#include "llvm/CodeGen/MachineConstantPool.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/MathExtras.h"

+#include "llvm/Support/raw_ostream.h"

+#include "llvm/ADT/SetVector.h"

+#include "llvm/ADT/SmallPtrSet.h"

+#include "llvm/ADT/SmallSet.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/ADT/StringExtras.h"

+#include <algorithm>

+#include <cmath>

+using namespace llvm;

+/// makeVTList - Return an instance of the SDVTList struct initialized with the

+/// specified members.

+static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) {

+ SDVTList Res = {VTs, NumVTs};

+ return Res;

+static const fltSemantics *MVTToAPFloatSemantics(MVT VT) {

+ switch (VT.getSimpleVT()) {

+ default: assert(0 && "Unknown FP format");

+ case MVT::f32: return &APFloat::IEEEsingle;

+ case MVT::f64: return &APFloat::IEEEdouble;

+ case MVT::f80: return &APFloat::x87DoubleExtended;

+ case MVT::f128: return &APFloat::IEEEquad;

+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;

+ }

+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}

+//===----------------------------------------------------------------------===//

+// ConstantFPSDNode Class

+//===----------------------------------------------------------------------===//

+/// isExactlyValue - We don't rely on operator== working on double values, as

+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.

+/// As such, this method can be used to do an exact bit-for-bit comparison of

+/// two floating point values.

+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {

+ return getValueAPF().bitwiseIsEqual(V);

+bool ConstantFPSDNode::isValueValidForType(MVT VT,

+ const APFloat& Val) {

+ assert(VT.isFloatingPoint() && "Can only convert between FP types");

+ // PPC long double cannot be converted to any other type.

+ if (VT == MVT::ppcf128 ||

+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)

+ return false;

+ // convert modifies in place, so make a copy.

+ APFloat Val2 = APFloat(Val);

+ bool losesInfo;

+ (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,

+ &losesInfo);

+ return !losesInfo;

+//===----------------------------------------------------------------------===//

+// ISD Namespace

+//===----------------------------------------------------------------------===//

+/// isBuildVectorAllOnes - Return true if the specified node is a

+/// BUILD_VECTOR where all of the elements are ~0 or undef.

+bool ISD::isBuildVectorAllOnes(const SDNode *N) {

+ // Look through a bit convert.

+ if (N->getOpcode() == ISD::BIT_CONVERT)

+ N = N->getOperand(0).getNode();

+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

+ unsigned i = 0, e = N->getNumOperands();

+ // Skip over all of the undef values.

+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)

+ ++i;

+ // Do not accept an all-undef vector.

+ if (i == e) return false;

+ // Do not accept build_vectors that aren't all constants or which have non-~0

+ // elements.

+ SDValue NotZero = N->getOperand(i);

+ if (isa<ConstantSDNode>(NotZero)) {

+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())

+ return false;

+ } else if (isa<ConstantFPSDNode>(NotZero)) {

+ if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().

+ bitcastToAPInt().isAllOnesValue())

+ return false;

+ } else

+ return false;

+ // Okay, we have at least one ~0 value, check to see if the rest match or are

+ // undefs.

+ for (++i; i != e; ++i)

+ if (N->getOperand(i) != NotZero &&

+ N->getOperand(i).getOpcode() != ISD::UNDEF)

+ return false;

+ return true;

+/// isBuildVectorAllZeros - Return true if the specified node is a

+/// BUILD_VECTOR where all of the elements are 0 or undef.

+bool ISD::isBuildVectorAllZeros(const SDNode *N) {

+ // Look through a bit convert.

+ if (N->getOpcode() == ISD::BIT_CONVERT)

+ N = N->getOperand(0).getNode();

+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

+ unsigned i = 0, e = N->getNumOperands();

+ // Skip over all of the undef values.

+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)

+ ++i;

+ // Do not accept an all-undef vector.

+ if (i == e) return false;

+ // Do not accept build_vectors that aren't all constants or which have non-~0

+ // elements.

+ SDValue Zero = N->getOperand(i);

+ if (isa<ConstantSDNode>(Zero)) {

+ if (!cast<ConstantSDNode>(Zero)->isNullValue())

+ return false;

+ } else if (isa<ConstantFPSDNode>(Zero)) {

+ if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())

+ return false;

+ } else

+ return false;

+ // Okay, we have at least one ~0 value, check to see if the rest match or are

+ // undefs.

+ for (++i; i != e; ++i)

+ if (N->getOperand(i) != Zero &&

+ N->getOperand(i).getOpcode() != ISD::UNDEF)

+ return false;

+ return true;

+/// isScalarToVector - Return true if the specified node is a

+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low

+/// element is not an undef.

+bool ISD::isScalarToVector(const SDNode *N) {

+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)

+ return true;

+ if (N->getOpcode() != ISD::BUILD_VECTOR)

+ return false;

+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)

+ return false;

+ unsigned NumElems = N->getNumOperands();

+ for (unsigned i = 1; i < NumElems; ++i) {

+ SDValue V = N->getOperand(i);

+ if (V.getOpcode() != ISD::UNDEF)

+ return false;

+ }

+ return true;

+/// isDebugLabel - Return true if the specified node represents a debug

+/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node).

+bool ISD::isDebugLabel(const SDNode *N) {

+ SDValue Zero;

+ if (N->getOpcode() == ISD::DBG_LABEL)

+ return true;

+ if (N->isMachineOpcode() &&

+ N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL)

+ return true;

+ return false;

+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)

+/// when given the operation for (X op Y).

+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {

+ // To perform this operation, we just need to swap the L and G bits of the

+ // operation.

+ unsigned OldL = (Operation >> 2) & 1;

+ unsigned OldG = (Operation >> 1) & 1;

+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits

+ (OldL << 1) | // New G bit

+ (OldG << 2)); // New L bit.

+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where

+/// 'op' is a valid SetCC operation.

+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {

+ unsigned Operation = Op;

+ if (isInteger)

+ Operation ^= 7; // Flip L, G, E bits, but not U.

+ else

+ Operation ^= 15; // Flip all of the condition bits.

+ if (Operation > ISD::SETTRUE2)

+ Operation &= ~8; // Don't let N and U bits get set.

+ return ISD::CondCode(Operation);

+/// isSignedOp - For an integer comparison, return 1 if the comparison is a

+/// signed operation and 2 if the result is an unsigned comparison. Return zero

+/// if the operation does not depend on the sign of the input (setne and seteq).

+static int isSignedOp(ISD::CondCode Opcode) {

+ switch (Opcode) {

+ default: assert(0 && "Illegal integer setcc operation!");

+ case ISD::SETEQ:

+ case ISD::SETNE: return 0;

+ case ISD::SETLT:

+ case ISD::SETLE:

+ case ISD::SETGT:

+ case ISD::SETGE: return 1;

+ case ISD::SETULT:

+ case ISD::SETULE:

+ case ISD::SETUGT:

+ case ISD::SETUGE: return 2;

+ }

+/// getSetCCOrOperation - Return the result of a logical OR between different

+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function

+/// returns SETCC_INVALID if it is not possible to represent the resultant

+/// comparison.

+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,

+ bool isInteger) {

+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)

+ // Cannot fold a signed integer setcc with an unsigned integer setcc.

+ return ISD::SETCC_INVALID;

+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.

+ // If the N and U bits get set then the resultant comparison DOES suddenly

+ // care about orderedness, and is true when ordered.

+ if (Op > ISD::SETTRUE2)

+ Op &= ~16; // Clear the U bit if the N bit is set.

+ // Canonicalize illegal integer setcc's.

+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT

+ Op = ISD::SETNE;

+ return ISD::CondCode(Op);

+/// getSetCCAndOperation - Return the result of a logical AND between different

+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This

+/// function returns zero if it is not possible to represent the resultant

+/// comparison.

+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,

+ bool isInteger) {

+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)

+ // Cannot fold a signed setcc with an unsigned setcc.

+ return ISD::SETCC_INVALID;

+ // Combine all of the condition bits.

+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);

+ // Canonicalize illegal integer setcc's.

+ if (isInteger) {

+ switch (Result) {

+ default: break;

+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT

+ case ISD::SETOEQ: // SETEQ & SETU[LG]E

+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE

+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE

+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE

+ }

+ return Result;

+const TargetMachine &SelectionDAG::getTarget() const {

+ return MF->getTarget();

+//===----------------------------------------------------------------------===//

+// SDNode Profile Support

+//===----------------------------------------------------------------------===//

+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.

+///

+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {

+ ID.AddInteger(OpC);

+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them

+/// solely with their pointer.

+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {

+ ID.AddPointer(VTList.VTs);

+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.

+///

+static void AddNodeIDOperands(FoldingSetNodeID &ID,

+ const SDValue *Ops, unsigned NumOps) {

+ for (; NumOps; --NumOps, ++Ops) {

+ ID.AddPointer(Ops->getNode());

+ ID.AddInteger(Ops->getResNo());

+ }

+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.

+///

+static void AddNodeIDOperands(FoldingSetNodeID &ID,

+ const SDUse *Ops, unsigned NumOps) {

+ for (; NumOps; --NumOps, ++Ops) {

+ ID.AddPointer(Ops->getNode());

+ ID.AddInteger(Ops->getResNo());

+ }

+static void AddNodeIDNode(FoldingSetNodeID &ID,

+ unsigned short OpC, SDVTList VTList,

+ const SDValue *OpList, unsigned N) {

+ AddNodeIDOpcode(ID, OpC);

+ AddNodeIDValueTypes(ID, VTList);

+ AddNodeIDOperands(ID, OpList, N);

+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to

+/// the NodeID data.

+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {

+ switch (N->getOpcode()) {

+ default: break; // Normal nodes don't need extra info.

+ case ISD::ARG_FLAGS:

+ ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());

+ break;

+ case ISD::TargetConstant:

+ case ISD::Constant:

+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());

+ break;

+ case ISD::TargetConstantFP:

+ case ISD::ConstantFP: {

+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());

+ break;

+ }

+ case ISD::TargetGlobalAddress:

+ case ISD::GlobalAddress:

+ case ISD::TargetGlobalTLSAddress:

+ case ISD::GlobalTLSAddress: {

+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);

+ ID.AddPointer(GA->getGlobal());

+ ID.AddInteger(GA->getOffset());

+ break;

+ }

+ case ISD::BasicBlock:

+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());

+ break;

+ case ISD::Register:

+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());

+ break;

+ case ISD::DBG_STOPPOINT: {

+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N);

+ ID.AddInteger(DSP->getLine());

+ ID.AddInteger(DSP->getColumn());

+ ID.AddPointer(DSP->getCompileUnit());

+ break;

+ }

+ case ISD::SRCVALUE:

+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());

+ break;

+ case ISD::MEMOPERAND: {

+ const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO;

+ MO.Profile(ID);

+ break;

+ }

+ case ISD::FrameIndex:

+ case ISD::TargetFrameIndex:

+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());

+ break;

+ case ISD::JumpTable:

+ case ISD::TargetJumpTable:

+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());

+ break;

+ case ISD::ConstantPool:

+ case ISD::TargetConstantPool: {

+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);

+ ID.AddInteger(CP->getAlignment());

+ ID.AddInteger(CP->getOffset());

+ if (CP->isMachineConstantPoolEntry())

+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);

+ else

+ ID.AddPointer(CP->getConstVal());

+ break;

+ }

+ case ISD::CALL: {

+ const CallSDNode *Call = cast<CallSDNode>(N);

+ ID.AddInteger(Call->getCallingConv());

+ ID.AddInteger(Call->isVarArg());

+ break;

+ }

+ case ISD::LOAD: {

+ const LoadSDNode *LD = cast<LoadSDNode>(N);

+ ID.AddInteger(LD->getMemoryVT().getRawBits());

+ ID.AddInteger(LD->getRawSubclassData());

+ break;

+ }

+ case ISD::STORE: {

+ const StoreSDNode *ST = cast<StoreSDNode>(N);

+ ID.AddInteger(ST->getMemoryVT().getRawBits());

+ ID.AddInteger(ST->getRawSubclassData());

+ break;

+ }

+ case ISD::ATOMIC_CMP_SWAP:

+ case ISD::ATOMIC_SWAP:

+ case ISD::ATOMIC_LOAD_ADD:

+ case ISD::ATOMIC_LOAD_SUB:

+ case ISD::ATOMIC_LOAD_AND:

+ case ISD::ATOMIC_LOAD_OR:

+ case ISD::ATOMIC_LOAD_XOR:

+ case ISD::ATOMIC_LOAD_NAND:

+ case ISD::ATOMIC_LOAD_MIN:

+ case ISD::ATOMIC_LOAD_MAX:

+ case ISD::ATOMIC_LOAD_UMIN:

+ case ISD::ATOMIC_LOAD_UMAX: {

+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);

+ ID.AddInteger(AT->getMemoryVT().getRawBits());

+ ID.AddInteger(AT->getRawSubclassData());

+ break;

+ }

+ case ISD::VECTOR_SHUFFLE: {

+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();

+ i != e; ++i)

+ ID.AddInteger(SVN->getMaskElt(i));

+ break;

+ }

+ } // end switch (N->getOpcode())

+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID

+/// data.

+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {

+ AddNodeIDOpcode(ID, N->getOpcode());

+ // Add the return value info.

+ AddNodeIDValueTypes(ID, N->getVTList());

+ // Add the operand info.

+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());

+ // Handle SDNode leafs with special info.

+ AddNodeIDCustom(ID, N);

+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in

+/// the CSE map that carries alignment, volatility, indexing mode, and

+/// extension/truncation information.

+///

+static inline unsigned

+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM,

+ bool isVolatile, unsigned Alignment) {

+ assert((ConvType & 3) == ConvType &&

+ "ConvType may not require more than 2 bits!");

+ assert((AM & 7) == AM &&

+ "AM may not require more than 3 bits!");

+ return ConvType |

+ (AM << 2) |

+ (isVolatile << 5) |

+ ((Log2_32(Alignment) + 1) << 6);

+//===----------------------------------------------------------------------===//

+// SelectionDAG Class

+//===----------------------------------------------------------------------===//

+/// doNotCSE - Return true if CSE should not be performed for this node.

+static bool doNotCSE(SDNode *N) {

+ if (N->getValueType(0) == MVT::Flag)

+ return true; // Never CSE anything that produces a flag.

+ switch (N->getOpcode()) {

+ default: break;

+ case ISD::HANDLENODE:

+ case ISD::DBG_LABEL:

+ case ISD::DBG_STOPPOINT:

+ case ISD::EH_LABEL:

+ case ISD::DECLARE:

+ return true; // Never CSE these nodes.

+ }

+ // Check that remaining values produced are not flags.

+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)

+ if (N->getValueType(i) == MVT::Flag)

+ return true; // Never CSE anything that produces a flag.

+ return false;

+/// RemoveDeadNodes - This method deletes all unreachable nodes in the

+/// SelectionDAG.

+void SelectionDAG::RemoveDeadNodes() {

+ // Create a dummy node (which is not added to allnodes), that adds a reference

+ // to the root node, preventing it from being deleted.

+ HandleSDNode Dummy(getRoot());

+ SmallVector<SDNode*, 128> DeadNodes;

+ // Add all obviously-dead nodes to the DeadNodes worklist.

+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)

+ if (I->use_empty())

+ DeadNodes.push_back(I);

+ RemoveDeadNodes(DeadNodes);

+ // If the root changed (e.g. it was a dead load, update the root).

+ setRoot(Dummy.getValue());

+/// RemoveDeadNodes - This method deletes the unreachable nodes in the

+/// given list, and any nodes that become unreachable as a result.

+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,

+ DAGUpdateListener *UpdateListener) {

+ // Process the worklist, deleting the nodes and adding their uses to the

+ // worklist.

+ while (!DeadNodes.empty()) {

+ SDNode *N = DeadNodes.pop_back_val();

+ if (UpdateListener)

+ UpdateListener->NodeDeleted(N, 0);

+ // Take the node out of the appropriate CSE map.

+ RemoveNodeFromCSEMaps(N);

+ // Next, brutally remove the operand list. This is safe to do, as there are

+ // no cycles in the graph.

+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {

+ SDUse &Use = *I++;

+ SDNode *Operand = Use.getNode();

+ Use.set(SDValue());

+ // Now that we removed this operand, see if there are no uses of it left.

+ if (Operand->use_empty())

+ DeadNodes.push_back(Operand);

+ }

+ DeallocateNode(N);

+ }

+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){

+ SmallVector<SDNode*, 16> DeadNodes(1, N);

+ RemoveDeadNodes(DeadNodes, UpdateListener);

+void SelectionDAG::DeleteNode(SDNode *N) {

+ // First take this out of the appropriate CSE map.

+ RemoveNodeFromCSEMaps(N);

+ // Finally, remove uses due to operands of this node, remove from the

+ // AllNodes list, and delete the node.

+ DeleteNodeNotInCSEMaps(N);

+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {

+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");

+ assert(N->use_empty() && "Cannot delete a node that is not dead!");

+ // Drop all of the operands and decrement used node's use counts.

+ N->DropOperands();

+ DeallocateNode(N);

+void SelectionDAG::DeallocateNode(SDNode *N) {

+ if (N->OperandsNeedDelete)

+ delete[] N->OperandList;

+ // Set the opcode to DELETED_NODE to help catch bugs when node

+ // memory is reallocated.

+ N->NodeType = ISD::DELETED_NODE;

+ NodeAllocator.Deallocate(AllNodes.remove(N));

+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that

+/// correspond to it. This is useful when we're about to delete or repurpose

+/// the node. We don't want future request for structurally identical nodes

+/// to return N anymore.

+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {

+ bool Erased = false;

+ switch (N->getOpcode()) {

+ case ISD::EntryToken:

+ assert(0 && "EntryToken should not be in CSEMaps!");

+ return false;

+ case ISD::HANDLENODE: return false; // noop.

+ case ISD::CONDCODE:

+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&

+ "Cond code doesn't exist!");

+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;

+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;

+ break;

+ case ISD::ExternalSymbol:

+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());

+ break;

+ case ISD::TargetExternalSymbol:

+ Erased =

+ TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());

+ break;

+ case ISD::VALUETYPE: {

+ MVT VT = cast<VTSDNode>(N)->getVT();

+ if (VT.isExtended()) {

+ Erased = ExtendedValueTypeNodes.erase(VT);

+ } else {

+ Erased = ValueTypeNodes[VT.getSimpleVT()] != 0;

+ ValueTypeNodes[VT.getSimpleVT()] = 0;

+ }

+ break;

+ }

+ default:

+ // Remove it from the CSE Map.

+ Erased = CSEMap.RemoveNode(N);

+ break;

+ }

+#ifndef NDEBUG

+ // Verify that the node was actually in one of the CSE maps, unless it has a

+ // flag result (which cannot be CSE'd) or is one of the special cases that are

+ // not subject to CSE.

+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&

+ !N->isMachineOpcode() && !doNotCSE(N)) {

+ N->dump(this);

+ cerr << "\n";

+ assert(0 && "Node is not in map!");

+ }

+#endif

+ return Erased;

+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE

+/// maps and modified in place. Add it back to the CSE maps, unless an identical

+/// node already exists, in which case transfer all its users to the existing

+/// node. This transfer can potentially trigger recursive merging.

+///

+void

+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,

+ DAGUpdateListener *UpdateListener) {

+ // For node types that aren't CSE'd, just act as if no identical node

+ // already exists.

+ if (!doNotCSE(N)) {

+ SDNode *Existing = CSEMap.GetOrInsertNode(N);

+ if (Existing != N) {

+ // If there was already an existing matching node, use ReplaceAllUsesWith

+ // to replace the dead one with the existing one. This can cause

+ // recursive merging of other unrelated nodes down the line.

+ ReplaceAllUsesWith(N, Existing, UpdateListener);

+ // N is now dead. Inform the listener if it exists and delete it.

+ if (UpdateListener)

+ UpdateListener->NodeDeleted(N, Existing);

+ DeleteNodeNotInCSEMaps(N);

+ return;

+ }

+ // If the node doesn't already exist, we updated it. Inform a listener if

+ // it exists.

+ if (UpdateListener)

+ UpdateListener->NodeUpdated(N);

+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands

+/// were replaced with those specified. If this node is never memoized,

+/// return null, otherwise return a pointer to the slot it would take. If a

+/// node already exists with these operands, the slot will be non-null.

+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,

+ void *&InsertPos) {

+ if (doNotCSE(N))

+ return 0;

+ SDValue Ops[] = { Op };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);

+ AddNodeIDCustom(ID, N);

+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);

+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands

+/// were replaced with those specified. If this node is never memoized,

+/// return null, otherwise return a pointer to the slot it would take. If a

+/// node already exists with these operands, the slot will be non-null.

+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,

+ SDValue Op1, SDValue Op2,

+ void *&InsertPos) {

+ if (doNotCSE(N))

+ return 0;

+ SDValue Ops[] = { Op1, Op2 };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);

+ AddNodeIDCustom(ID, N);

+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);

+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands

+/// were replaced with those specified. If this node is never memoized,

+/// return null, otherwise return a pointer to the slot it would take. If a

+/// node already exists with these operands, the slot will be non-null.

+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,

+ const SDValue *Ops,unsigned NumOps,

+ void *&InsertPos) {

+ if (doNotCSE(N))

+ return 0;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);

+ AddNodeIDCustom(ID, N);

+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);

+/// VerifyNode - Sanity check the given node. Aborts if it is invalid.

+void SelectionDAG::VerifyNode(SDNode *N) {

+ switch (N->getOpcode()) {

+ default:

+ break;

+ case ISD::BUILD_PAIR: {

+ MVT VT = N->getValueType(0);

+ assert(N->getNumValues() == 1 && "Too many results!");

+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&

+ "Wrong return type!");

+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");

+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&

+ "Mismatched operand types!");

+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&

+ "Wrong operand type!");

+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&

+ "Wrong return type size");

+ break;

+ }

+ case ISD::BUILD_VECTOR: {

+ assert(N->getNumValues() == 1 && "Too many results!");

+ assert(N->getValueType(0).isVector() && "Wrong return type!");

+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&

+ "Wrong number of operands!");

+ MVT EltVT = N->getValueType(0).getVectorElementType();

+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)

+ assert((I->getValueType() == EltVT ||

+ (EltVT.isInteger() && I->getValueType().isInteger() &&

+ EltVT.bitsLE(I->getValueType()))) &&

+ "Wrong operand type!");

+ break;

+ }

+/// getMVTAlignment - Compute the default alignment value for the

+/// given type.

+///

+unsigned SelectionDAG::getMVTAlignment(MVT VT) const {

+ const Type *Ty = VT == MVT::iPTR ?

+ PointerType::get(Type::Int8Ty, 0) :

+ VT.getTypeForMVT();

+ return TLI.getTargetData()->getABITypeAlignment(Ty);

+// EntryNode could meaningfully have debug info if we can find it...

+SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)

+ : TLI(tli), FLI(fli), DW(0),

+ EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),

+ getVTList(MVT::Other)), Root(getEntryNode()) {

+ AllNodes.push_back(&EntryNode);

+void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,

+ DwarfWriter *dw) {

+ MF = &mf;

+ MMI = mmi;

+ DW = dw;

+SelectionDAG::~SelectionDAG() {

+ allnodes_clear();

+void SelectionDAG::allnodes_clear() {

+ assert(&*AllNodes.begin() == &EntryNode);

+ AllNodes.remove(AllNodes.begin());

+ while (!AllNodes.empty())

+ DeallocateNode(AllNodes.begin());

+void SelectionDAG::clear() {

+ allnodes_clear();

+ OperandAllocator.Reset();

+ CSEMap.clear();

+ ExtendedValueTypeNodes.clear();

+ ExternalSymbols.clear();

+ TargetExternalSymbols.clear();

+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),

+ static_cast<CondCodeSDNode*>(0));

+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),

+ static_cast<SDNode*>(0));

+ EntryNode.UseList = 0;

+ AllNodes.push_back(&EntryNode);

+ Root = getEntryNode();

+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {

+ if (Op.getValueType() == VT) return Op;

+ APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),

+ VT.getSizeInBits());

+ return getNode(ISD::AND, DL, Op.getValueType(), Op,

+ getConstant(Imm, Op.getValueType()));

+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).

+///

+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) {

+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;

+ SDValue NegOne =

+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);

+ return getNode(ISD::XOR, DL, VT, Val, NegOne);

+SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) {

+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;

+ assert((EltVT.getSizeInBits() >= 64 ||

+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&

+ "getConstant with a uint64_t value that doesn't fit in the type!");

+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);

+SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) {

+ return getConstant(*ConstantInt::get(Val), VT, isT);

+SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) {

+ assert(VT.isInteger() && "Cannot create FP integer constant!");

+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;

+ assert(Val.getBitWidth() == EltVT.getSizeInBits() &&

+ "APInt size does not match type size!");

+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);

+ ID.AddPointer(&Val);

+ void *IP = 0;

+ SDNode *N = NULL;

+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))

+ if (!VT.isVector())

+ return SDValue(N, 0);

+ if (!N) {

+ N = NodeAllocator.Allocate<ConstantSDNode>();

+ new (N) ConstantSDNode(isT, &Val, EltVT);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ }

+ SDValue Result(N, 0);

+ if (VT.isVector()) {

+ SmallVector<SDValue, 8> Ops;

+ Ops.assign(VT.getVectorNumElements(), Result);

+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),

+ VT, &Ops[0], Ops.size());

+ }

+ return Result;

+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {

+ return getConstant(Val, TLI.getPointerTy(), isTarget);

+SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) {

+ return getConstantFP(*ConstantFP::get(V), VT, isTarget);

+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){

+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");

+ MVT EltVT =

+ VT.isVector() ? VT.getVectorElementType() : VT;

+ // Do the map lookup using the actual bit pattern for the floating point

+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and

+ // we don't have issues with SNANs.

+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);

+ ID.AddPointer(&V);

+ void *IP = 0;

+ SDNode *N = NULL;

+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))

+ if (!VT.isVector())

+ return SDValue(N, 0);

+ if (!N) {

+ N = NodeAllocator.Allocate<ConstantFPSDNode>();

+ new (N) ConstantFPSDNode(isTarget, &V, EltVT);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ }

+ SDValue Result(N, 0);

+ if (VT.isVector()) {

+ SmallVector<SDValue, 8> Ops;

+ Ops.assign(VT.getVectorNumElements(), Result);

+ // FIXME DebugLoc info might be appropriate here

+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),

+ VT, &Ops[0], Ops.size());

+ }

+ return Result;

+SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {

+ MVT EltVT =

+ VT.isVector() ? VT.getVectorElementType() : VT;

+ if (EltVT==MVT::f32)

+ return getConstantFP(APFloat((float)Val), VT, isTarget);

+ else

+ return getConstantFP(APFloat(Val), VT, isTarget);

+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,

+ MVT VT, int64_t Offset,

+ bool isTargetGA) {

+ unsigned Opc;

+ // Truncate (with sign-extension) the offset value to the pointer size.

+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();

+ if (BitWidth < 64)

+ Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));

+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);

+ if (!GVar) {

+ // If GV is an alias then use the aliasee for determining thread-localness.

+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))

+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));

+ }

+ if (GVar && GVar->isThreadLocal())

+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;

+ else

+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);

+ ID.AddPointer(GV);

+ ID.AddInteger(Offset);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();

+ new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {

+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);

+ ID.AddInteger(FI);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();

+ new (N) FrameIndexSDNode(FI, VT, isTarget);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){

+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);

+ ID.AddInteger(JTI);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();

+ new (N) JumpTableSDNode(JTI, VT, isTarget);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,

+ unsigned Alignment, int Offset,

+ bool isTarget) {

+ if (Alignment == 0)

+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());

+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);

+ ID.AddInteger(Alignment);

+ ID.AddInteger(Offset);

+ ID.AddPointer(C);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();

+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,

+ unsigned Alignment, int Offset,

+ bool isTarget) {

+ if (Alignment == 0)

+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());

+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);

+ ID.AddInteger(Alignment);

+ ID.AddInteger(Offset);

+ C->AddSelectionDAGCSEId(ID);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();

+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);

+ ID.AddPointer(MBB);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();

+ new (N) BasicBlockSDNode(MBB);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0);

+ ID.AddInteger(Flags.getRawBits());

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>();

+ new (N) ARG_FLAGSSDNode(Flags);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getValueType(MVT VT) {

+ if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size())

+ ValueTypeNodes.resize(VT.getSimpleVT()+1);

+ SDNode *&N = VT.isExtended() ?

+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()];

+ if (N) return SDValue(N, 0);

+ N = NodeAllocator.Allocate<VTSDNode>();

+ new (N) VTSDNode(VT);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {

+ SDNode *&N = ExternalSymbols[Sym];

+ if (N) return SDValue(N, 0);

+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();

+ new (N) ExternalSymbolSDNode(false, Sym, VT);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) {

+ SDNode *&N = TargetExternalSymbols[Sym];

+ if (N) return SDValue(N, 0);

+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();

+ new (N) ExternalSymbolSDNode(true, Sym, VT);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {

+ if ((unsigned)Cond >= CondCodeNodes.size())

+ CondCodeNodes.resize(Cond+1);

+ if (CondCodeNodes[Cond] == 0) {

+ CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();

+ new (N) CondCodeSDNode(Cond);

+ CondCodeNodes[Cond] = N;

+ AllNodes.push_back(N);

+ }

+ return SDValue(CondCodeNodes[Cond], 0);

+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in

+// the shuffle mask M that point at N1 to point at N2, and indices that point

+// N2 to point at N1.

+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {

+ std::swap(N1, N2);

+ int NElts = M.size();

+ for (int i = 0; i != NElts; ++i) {

+ if (M[i] >= NElts)

+ M[i] -= NElts;

+ else if (M[i] >= 0)

+ M[i] += NElts;

+ }

+SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,

+ SDValue N2, const int *Mask) {

+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");

+ assert(VT.isVector() && N1.getValueType().isVector() &&

+ "Vector Shuffle VTs must be a vectors");

+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()

+ && "Vector Shuffle VTs must have same element type");

+ // Canonicalize shuffle undef, undef -> undef

+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)

+ return N1;

+ // Validate that all indices in Mask are within the range of the elements

+ // input to the shuffle.

+ unsigned NElts = VT.getVectorNumElements();

+ SmallVector<int, 8> MaskVec;

+ for (unsigned i = 0; i != NElts; ++i) {

+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");

+ MaskVec.push_back(Mask[i]);

+ }

+ // Canonicalize shuffle v, v -> v, undef

+ if (N1 == N2) {

+ N2 = getUNDEF(VT);

+ for (unsigned i = 0; i != NElts; ++i)

+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;

+ }

+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.

+ if (N1.getOpcode() == ISD::UNDEF)

+ commuteShuffle(N1, N2, MaskVec);

+ // Canonicalize all index into lhs, -> shuffle lhs, undef

+ // Canonicalize all index into rhs, -> shuffle rhs, undef

+ bool AllLHS = true, AllRHS = true;

+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;

+ for (unsigned i = 0; i != NElts; ++i) {

+ if (MaskVec[i] >= (int)NElts) {

+ if (N2Undef)

+ MaskVec[i] = -1;

+ else

+ AllLHS = false;

+ } else if (MaskVec[i] >= 0) {

+ AllRHS = false;

+ }

+ if (AllLHS && AllRHS)

+ return getUNDEF(VT);

+ if (AllLHS && !N2Undef)

+ N2 = getUNDEF(VT);

+ if (AllRHS) {

+ N1 = getUNDEF(VT);

+ commuteShuffle(N1, N2, MaskVec);

+ }

+ // If Identity shuffle, or all shuffle in to undef, return that node.

+ bool AllUndef = true;

+ bool Identity = true;

+ for (unsigned i = 0; i != NElts; ++i) {

+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;

+ if (MaskVec[i] >= 0) AllUndef = false;

+ }

+ if (Identity)

+ return N1;

+ if (AllUndef)

+ return getUNDEF(VT);

+ FoldingSetNodeID ID;

+ SDValue Ops[2] = { N1, N2 };

+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);

+ for (unsigned i = 0; i != NElts; ++i)

+ ID.AddInteger(MaskVec[i]);

+ void* IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ // Allocate the mask array for the node out of the BumpPtrAllocator, since

+ // SDNode doesn't have access to it. This memory will be "leaked" when

+ // the node is deallocated, but recovered when the NodeAllocator is released.

+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);

+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));

+ ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();

+ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,

+ SDValue Val, SDValue DTy,

+ SDValue STy, SDValue Rnd, SDValue Sat,

+ ISD::CvtCode Code) {

+ // If the src and dest types are the same and the conversion is between

+ // integer types of the same sign or two floats, no conversion is necessary.

+ if (DTy == STy &&

+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))

+ return Val;

+ FoldingSetNodeID ID;

+ void* IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();

+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };

+ new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);

+ ID.AddInteger(RegNo);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();

+ new (N) RegisterSDNode(RegNo, VT);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,

+ unsigned Line, unsigned Col,

+ Value *CU) {

+ SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();

+ new (N) DbgStopPointSDNode(Root, Line, Col, CU);

+ N->setDebugLoc(DL);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,

+ SDValue Root,

+ unsigned LabelID) {

+ FoldingSetNodeID ID;

+ SDValue Ops[] = { Root };

+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);

+ ID.AddInteger(LabelID);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<LabelSDNode>();

+ new (N) LabelSDNode(Opcode, dl, Root, LabelID);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getSrcValue(const Value *V) {

+ assert((!V || isa<PointerType>(V->getType())) &&

+ "SrcValue is not a pointer?");

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);

+ ID.AddPointer(V);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();

+ new (N) SrcValueSDNode(V);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) {

+#ifndef NDEBUG

+ const Value *v = MO.getValue();

+ assert((!v || isa<PointerType>(v->getType())) &&

+ "SrcValue is not a pointer?");

+#endif

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0);

+ MO.Profile(ID);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>();

+ new (N) MemOperandSDNode(MO);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+/// getShiftAmountOperand - Return the specified value casted to

+/// the target's desired shift amount type.

+SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {

+ MVT OpTy = Op.getValueType();

+ MVT ShTy = TLI.getShiftAmountTy();

+ if (OpTy == ShTy || OpTy.isVector()) return Op;

+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;

+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);

+/// CreateStackTemporary - Create a stack temporary, suitable for holding the

+/// specified value type.

+SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {

+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();

+ unsigned ByteSize = VT.getStoreSizeInBits()/8;

+ const Type *Ty = VT.getTypeForMVT();

+ unsigned StackAlign =

+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);

+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);

+ return getFrameIndex(FrameIdx, TLI.getPointerTy());

+/// CreateStackTemporary - Create a stack temporary suitable for holding

+/// either of the specified value types.

+SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {

+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),

+ VT2.getStoreSizeInBits())/8;

+ const Type *Ty1 = VT1.getTypeForMVT();

+ const Type *Ty2 = VT2.getTypeForMVT();

+ const TargetData *TD = TLI.getTargetData();

+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),

+ TD->getPrefTypeAlignment(Ty2));

+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();

+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);

+ return getFrameIndex(FrameIdx, TLI.getPointerTy());

+SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,

+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {

+ // These setcc operations always fold.

+ switch (Cond) {

+ default: break;

+ case ISD::SETFALSE:

+ case ISD::SETFALSE2: return getConstant(0, VT);

+ case ISD::SETTRUE:

+ case ISD::SETTRUE2: return getConstant(1, VT);

+ case ISD::SETOEQ:

+ case ISD::SETOGT:

+ case ISD::SETOGE:

+ case ISD::SETOLT:

+ case ISD::SETOLE:

+ case ISD::SETONE:

+ case ISD::SETO:

+ case ISD::SETUO:

+ case ISD::SETUEQ:

+ case ISD::SETUNE:

+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");

+ break;

+ }

+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {

+ const APInt &C2 = N2C->getAPIntValue();

+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {

+ const APInt &C1 = N1C->getAPIntValue();

+ switch (Cond) {

+ default: assert(0 && "Unknown integer setcc!");

+ case ISD::SETEQ: return getConstant(C1 == C2, VT);

+ case ISD::SETNE: return getConstant(C1 != C2, VT);

+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);

+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);

+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);

+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);

+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);

+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);

+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);

+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);

+ }

+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {

+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {

+ // No compile time operations on this type yet.

+ if (N1C->getValueType(0) == MVT::ppcf128)

+ return SDValue();

+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());

+ switch (Cond) {

+ default: break;

+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);

+ case ISD::SETNE: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||

+ R==APFloat::cmpLessThan, VT);

+ case ISD::SETLT: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);

+ case ISD::SETGT: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);

+ case ISD::SETLE: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||

+ R==APFloat::cmpEqual, VT);

+ case ISD::SETGE: if (R==APFloat::cmpUnordered)

+ return getUNDEF(VT);

+ // fall through

+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||

+ R==APFloat::cmpEqual, VT);

+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);

+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);

+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||

+ R==APFloat::cmpEqual, VT);

+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);

+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||

+ R==APFloat::cmpLessThan, VT);

+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||

+ R==APFloat::cmpUnordered, VT);

+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);

+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);

+ }

+ } else {

+ // Ensure that the constant occurs on the RHS.

+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));

+ }

+ // Could not fold it.

+ return SDValue();

+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We

+/// use this predicate to simplify operations downstream.

+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {

+ unsigned BitWidth = Op.getValueSizeInBits();

+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);

+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use

+/// this predicate to simplify operations downstream. Mask is known to be zero

+/// for bits that V cannot have.

+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,

+ unsigned Depth) const {

+ APInt KnownZero, KnownOne;

+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ return (KnownZero & Mask) == Mask;

+/// ComputeMaskedBits - Determine which of the bits specified in Mask are

+/// known to be either zero or one and return them in the KnownZero/KnownOne

+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit

+/// processing.

+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,

+ APInt &KnownZero, APInt &KnownOne,

+ unsigned Depth) const {

+ unsigned BitWidth = Mask.getBitWidth();

+ assert(BitWidth == Op.getValueType().getSizeInBits() &&

+ "Mask size mismatches value type size!");

+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.

+ if (Depth == 6 || Mask == 0)

+ return; // Limit search depth.

+ APInt KnownZero2, KnownOne2;

+ switch (Op.getOpcode()) {

+ case ISD::Constant:

+ // We know all of the bits for a constant!

+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;

+ KnownZero = ~KnownOne & Mask;

+ return;

+ case ISD::AND:

+ // If either the LHS or the RHS are Zero, the result is zero.

+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,

+ KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // Output known-1 bits are only known if set in both the LHS & RHS.

+ KnownOne &= KnownOne2;

+ // Output known-0 are known to be clear if zero in either the LHS | RHS.

+ KnownZero |= KnownZero2;

+ return;

+ case ISD::OR:

+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,

+ KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // Output known-0 bits are only known if clear in both the LHS & RHS.

+ KnownZero &= KnownZero2;

+ // Output known-1 are known to be set if set in either the LHS | RHS.

+ KnownOne |= KnownOne2;

+ return;

+ case ISD::XOR: {

+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // Output known-0 bits are known if clear or set in both the LHS & RHS.

+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);

+ // Output known-1 are known to be set if set in only one of the LHS, RHS.

+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);

+ KnownZero = KnownZeroOut;

+ return;

+ }

+ case ISD::MUL: {

+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);

+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If low bits are zero in either operand, output low known-0 bits.

+ // Also compute a conserative estimate for high known-0 bits.

+ // More trickiness is possible, but this is sufficient for the

+ // interesting case of alignment computation.

+ KnownOne.clear();

+ unsigned TrailZ = KnownZero.countTrailingOnes() +

+ KnownZero2.countTrailingOnes();

+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +

+ KnownZero2.countLeadingOnes(),

+ BitWidth) - BitWidth;

+ TrailZ = std::min(TrailZ, BitWidth);

+ LeadZ = std::min(LeadZ, BitWidth);

+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |

+ APInt::getHighBitsSet(BitWidth, LeadZ);

+ KnownZero &= Mask;

+ return;

+ }

+ case ISD::UDIV: {

+ // For the purposes of computing leading zeros we can conservatively

+ // treat a udiv as a logical right shift by the power of 2 known to

+ // be less than the denominator.

+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);

+ ComputeMaskedBits(Op.getOperand(0),

+ AllOnes, KnownZero2, KnownOne2, Depth+1);

+ unsigned LeadZ = KnownZero2.countLeadingOnes();

+ KnownOne2.clear();

+ KnownZero2.clear();

+ ComputeMaskedBits(Op.getOperand(1),

+ AllOnes, KnownZero2, KnownOne2, Depth+1);

+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();

+ if (RHSUnknownLeadingOnes != BitWidth)

+ LeadZ = std::min(BitWidth,

+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);

+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;

+ return;

+ }

+ case ISD::SELECT:

+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // Only known if known in both the LHS and RHS.

+ KnownOne &= KnownOne2;

+ KnownZero &= KnownZero2;

+ return;

+ case ISD::SELECT_CC:

+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);

+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // Only known if known in both the LHS and RHS.

+ KnownOne &= KnownOne2;

+ KnownZero &= KnownZero2;

+ return;

+ case ISD::SADDO:

+ case ISD::UADDO:

+ case ISD::SSUBO:

+ case ISD::USUBO:

+ case ISD::SMULO:

+ case ISD::UMULO:

+ if (Op.getResNo() != 1)

+ return;

+ // The boolean result conforms to getBooleanContents. Fall through.

+ case ISD::SETCC:

+ // If we know the result of a setcc has the top bits zero, use this info.

+ if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&

+ BitWidth > 1)

+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);

+ return;

+ case ISD::SHL:

+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ unsigned ShAmt = SA->getZExtValue();

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ return;

+ ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),

+ KnownZero, KnownOne, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero <<= ShAmt;

+ KnownOne <<= ShAmt;

+ // low bits known zero.

+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);

+ }

+ return;

+ case ISD::SRL:

+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ unsigned ShAmt = SA->getZExtValue();

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ return;

+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),

+ KnownZero, KnownOne, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero = KnownZero.lshr(ShAmt);

+ KnownOne = KnownOne.lshr(ShAmt);

+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;

+ KnownZero |= HighBits; // High bits known zero.

+ }

+ return;

+ case ISD::SRA:

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ unsigned ShAmt = SA->getZExtValue();

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ return;

+ APInt InDemandedMask = (Mask << ShAmt);

+ // If any of the demanded bits are produced by the sign extension, we also

+ // demand the input sign bit.

+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;

+ if (HighBits.getBoolValue())

+ InDemandedMask |= APInt::getSignBit(BitWidth);

+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,

+ Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero = KnownZero.lshr(ShAmt);

+ KnownOne = KnownOne.lshr(ShAmt);

+ // Handle the sign bits.

+ APInt SignBit = APInt::getSignBit(BitWidth);

+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.

+ if (KnownZero.intersects(SignBit)) {

+ KnownZero |= HighBits; // New bits are known zero.

+ } else if (KnownOne.intersects(SignBit)) {

+ KnownOne |= HighBits; // New bits are known one.

+ }

+ return;

+ case ISD::SIGN_EXTEND_INREG: {

+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ unsigned EBits = EVT.getSizeInBits();

+ // Sign extension. Compute the demanded bits in the result that are not

+ // present in the input.

+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;

+ APInt InSignBit = APInt::getSignBit(EBits);

+ APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);

+ // If the sign extended bits are demanded, we know that the sign

+ // bit is demanded.

+ InSignBit.zext(BitWidth);

+ if (NewBits.getBoolValue())

+ InputDemandedBits |= InSignBit;

+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,

+ KnownZero, KnownOne, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ // If the sign bit of the input is known set or clear, then we know the

+ // top bits of the result.

+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear

+ KnownZero |= NewBits;

+ KnownOne &= ~NewBits;

+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set

+ KnownOne |= NewBits;

+ KnownZero &= ~NewBits;

+ } else { // Input sign bit unknown

+ KnownZero &= ~NewBits;

+ KnownOne &= ~NewBits;

+ }

+ return;

+ }

+ case ISD::CTTZ:

+ case ISD::CTLZ:

+ case ISD::CTPOP: {

+ unsigned LowBits = Log2_32(BitWidth)+1;

+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);

+ KnownOne.clear();

+ return;

+ }

+ case ISD::LOAD: {

+ if (ISD::isZEXTLoad(Op.getNode())) {

+ LoadSDNode *LD = cast<LoadSDNode>(Op);

+ MVT VT = LD->getMemoryVT();

+ unsigned MemBits = VT.getSizeInBits();

+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;

+ }

+ return;

+ }

+ case ISD::ZERO_EXTEND: {

+ MVT InVT = Op.getOperand(0).getValueType();

+ unsigned InBits = InVT.getSizeInBits();

+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;

+ APInt InMask = Mask;

+ InMask.trunc(InBits);

+ KnownZero.trunc(InBits);

+ KnownOne.trunc(InBits);

+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ KnownZero |= NewBits;

+ return;

+ }

+ case ISD::SIGN_EXTEND: {

+ MVT InVT = Op.getOperand(0).getValueType();

+ unsigned InBits = InVT.getSizeInBits();

+ APInt InSignBit = APInt::getSignBit(InBits);

+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;

+ APInt InMask = Mask;

+ InMask.trunc(InBits);

+ // If any of the sign extended bits are demanded, we know that the sign

+ // bit is demanded. Temporarily set this bit in the mask for our callee.

+ if (NewBits.getBoolValue())

+ InMask |= InSignBit;

+ KnownZero.trunc(InBits);

+ KnownOne.trunc(InBits);

+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);

+ // Note if the sign bit is known to be zero or one.

+ bool SignBitKnownZero = KnownZero.isNegative();

+ bool SignBitKnownOne = KnownOne.isNegative();

+ assert(!(SignBitKnownZero && SignBitKnownOne) &&

+ "Sign bit can't be known to be both zero and one!");

+ // If the sign bit wasn't actually demanded by our caller, we don't

+ // want it set in the KnownZero and KnownOne result values. Reset the

+ // mask and reapply it to the result values.

+ InMask = Mask;

+ InMask.trunc(InBits);

+ KnownZero &= InMask;

+ KnownOne &= InMask;

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ // If the sign bit is known zero or one, the top bits match.

+ if (SignBitKnownZero)

+ KnownZero |= NewBits;

+ else if (SignBitKnownOne)

+ KnownOne |= NewBits;

+ return;

+ }

+ case ISD::ANY_EXTEND: {

+ MVT InVT = Op.getOperand(0).getValueType();

+ unsigned InBits = InVT.getSizeInBits();

+ APInt InMask = Mask;

+ InMask.trunc(InBits);

+ KnownZero.trunc(InBits);

+ KnownOne.trunc(InBits);

+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ return;

+ }

+ case ISD::TRUNCATE: {

+ MVT InVT = Op.getOperand(0).getValueType();

+ unsigned InBits = InVT.getSizeInBits();

+ APInt InMask = Mask;

+ InMask.zext(InBits);

+ KnownZero.zext(InBits);

+ KnownOne.zext(InBits);

+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero.trunc(BitWidth);

+ KnownOne.trunc(BitWidth);

+ break;

+ }

+ case ISD::AssertZext: {

+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());

+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,

+ KnownOne, Depth+1);

+ KnownZero |= (~InMask) & Mask;

+ return;

+ }

+ case ISD::FGETSIGN:

+ // All bits are zero except the low bit.

+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);

+ return;

+ case ISD::SUB: {

+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {

+ // We know that the top bits of C-X are clear if X contains less bits

+ // than C (i.e. no wrap-around can happen). For example, 20-X is

+ // positive if we can prove that X is >= 0 and < 16.

+ if (CLHS->getAPIntValue().isNonNegative()) {

+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();

+ // NLZ can't be BitWidth with no sign bit

+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);

+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,

+ Depth+1);

+ // If all of the MaskV bits are known to be zero, then we know the

+ // output top bits are zero, because we now know that the output is

+ // from [0-C].

+ if ((KnownZero2 & MaskV) == MaskV) {

+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();

+ // Top bits known zero.

+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;

+ }

+ // fall through

+ case ISD::ADD: {

+ // Output known-0 bits are known if clear or set in both the low clear bits

+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the

+ // low 3 bits clear.

+ APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());

+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();

+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ KnownZeroOut = std::min(KnownZeroOut,

+ KnownZero2.countTrailingOnes());

+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);

+ return;

+ }

+ case ISD::SREM:

+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ const APInt &RA = Rem->getAPIntValue();

+ if (RA.isPowerOf2() || (-RA).isPowerOf2()) {

+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;

+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);

+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);

+ // If the sign bit of the first operand is zero, the sign bit of

+ // the result is zero. If the first operand has no one bits below

+ // the second operand's single 1 bit, its sign will be zero.

+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))

+ KnownZero2 |= ~LowBits;

+ KnownZero |= KnownZero2 & Mask;

+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");

+ }

+ return;

+ case ISD::UREM: {

+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ const APInt &RA = Rem->getAPIntValue();

+ if (RA.isPowerOf2()) {

+ APInt LowBits = (RA - 1);

+ APInt Mask2 = LowBits & Mask;

+ KnownZero |= ~LowBits & Mask;

+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);

+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");

+ break;

+ }

+ // Since the result is less than or equal to either operand, any leading

+ // zero bits in either operand must also exist in the result.

+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);

+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,

+ Depth+1);

+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,

+ Depth+1);

+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),

+ KnownZero2.countLeadingOnes());

+ KnownOne.clear();

+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;

+ return;

+ }

+ default:

+ // Allow the target to implement this method for its nodes.

+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {

+ case ISD::INTRINSIC_WO_CHAIN:

+ case ISD::INTRINSIC_W_CHAIN:

+ case ISD::INTRINSIC_VOID:

+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);

+ }

+ return;

+ }

+/// ComputeNumSignBits - Return the number of times the sign bit of the

+/// register is replicated into the other bits. We know that at least 1 bit

+/// is always equal to the sign bit (itself), but other cases can give us

+/// information. For example, immediately after an "SRA X, 2", we know that

+/// the top 3 bits are all equal to each other, so we return 3.

+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{

+ MVT VT = Op.getValueType();

+ assert(VT.isInteger() && "Invalid VT!");

+ unsigned VTBits = VT.getSizeInBits();

+ unsigned Tmp, Tmp2;

+ unsigned FirstAnswer = 1;

+ if (Depth == 6)

+ return 1; // Limit search depth.

+ switch (Op.getOpcode()) {

+ default: break;

+ case ISD::AssertSext:

+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();

+ return VTBits-Tmp+1;

+ case ISD::AssertZext:

+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();

+ return VTBits-Tmp;

+ case ISD::Constant: {

+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();

+ // If negative, return # leading ones.

+ if (Val.isNegative())

+ return Val.countLeadingOnes();

+ // Return # leading zeros.

+ return Val.countLeadingZeros();

+ }

+ case ISD::SIGN_EXTEND:

+ Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits();

+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;

+ case ISD::SIGN_EXTEND_INREG:

+ // Max of the input and what this extends.

+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();

+ Tmp = VTBits-Tmp+1;

+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ return std::max(Tmp, Tmp2);

+ case ISD::SRA:

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ // SRA X, C -> adds C sign bits.

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ Tmp += C->getZExtValue();

+ if (Tmp > VTBits) Tmp = VTBits;

+ }

+ return Tmp;

+ case ISD::SHL:

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ // shl destroys sign bits.

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ if (C->getZExtValue() >= VTBits || // Bad shift.

+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.

+ return Tmp - C->getZExtValue();

+ }

+ break;

+ case ISD::AND:

+ case ISD::OR:

+ case ISD::XOR: // NOT is handled here.

+ // Logical binary ops preserve the number of sign bits at the worst.

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ if (Tmp != 1) {

+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);

+ FirstAnswer = std::min(Tmp, Tmp2);

+ // We computed what we know about the sign bits as our first

+ // answer. Now proceed to the generic code that uses

+ // ComputeMaskedBits, and pick whichever answer is better.

+ }

+ break;

+ case ISD::SELECT:

+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);

+ if (Tmp == 1) return 1; // Early out.

+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);

+ return std::min(Tmp, Tmp2);

+ case ISD::SADDO:

+ case ISD::UADDO:

+ case ISD::SSUBO:

+ case ISD::USUBO:

+ case ISD::SMULO:

+ case ISD::UMULO:

+ if (Op.getResNo() != 1)

+ break;

+ // The boolean result conforms to getBooleanContents. Fall through.

+ case ISD::SETCC:

+ // If setcc returns 0/-1, all bits are sign bits.

+ if (TLI.getBooleanContents() ==

+ TargetLowering::ZeroOrNegativeOneBooleanContent)

+ return VTBits;

+ break;

+ case ISD::ROTL:

+ case ISD::ROTR:

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);

+ // Handle rotate right by N like a rotate left by 32-N.

+ if (Op.getOpcode() == ISD::ROTR)

+ RotAmt = (VTBits-RotAmt) & (VTBits-1);

+ // If we aren't rotating out all of the known-in sign bits, return the

+ // number that are left. This handles rotl(sext(x), 1) for example.

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ if (Tmp > RotAmt+1) return Tmp-RotAmt;

+ }

+ break;

+ case ISD::ADD:

+ // Add can have at most one carry bit. Thus we know that the output

+ // is, at worst, one more bit than the inputs.

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ if (Tmp == 1) return 1; // Early out.

+ // Special case decrementing a value (ADD X, -1):

+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))

+ if (CRHS->isAllOnesValue()) {

+ APInt KnownZero, KnownOne;

+ APInt Mask = APInt::getAllOnesValue(VTBits);

+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);

+ // If the input is known to be 0 or 1, the output is 0/-1, which is all

+ // sign bits set.

+ if ((KnownZero | APInt(VTBits, 1)) == Mask)

+ return VTBits;

+ // If we are subtracting one from a positive number, there is no carry

+ // out of the result.

+ if (KnownZero.isNegative())

+ return Tmp;

+ }

+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);

+ if (Tmp2 == 1) return 1;

+ return std::min(Tmp, Tmp2)-1;

+ break;

+ case ISD::SUB:

+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);

+ if (Tmp2 == 1) return 1;

+ // Handle NEG.

+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))

+ if (CLHS->isNullValue()) {

+ APInt KnownZero, KnownOne;

+ APInt Mask = APInt::getAllOnesValue(VTBits);

+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);

+ // If the input is known to be 0 or 1, the output is 0/-1, which is all

+ // sign bits set.

+ if ((KnownZero | APInt(VTBits, 1)) == Mask)

+ return VTBits;

+ // If the input is known to be positive (the sign bit is known clear),

+ // the output of the NEG has the same number of sign bits as the input.

+ if (KnownZero.isNegative())

+ return Tmp2;

+ // Otherwise, we treat this like a SUB.

+ }

+ // Sub can have at most one carry bit. Thus we know that the output

+ // is, at worst, one more bit than the inputs.

+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

+ if (Tmp == 1) return 1; // Early out.

+ return std::min(Tmp, Tmp2)-1;

+ break;

+ case ISD::TRUNCATE:

+ // FIXME: it's tricky to do anything useful for this, but it is an important

+ // case for targets like X86.

+ break;

+ }

+ // Handle LOADX separately here. EXTLOAD case will fallthrough.

+ if (Op.getOpcode() == ISD::LOAD) {

+ LoadSDNode *LD = cast<LoadSDNode>(Op);

+ unsigned ExtType = LD->getExtensionType();

+ switch (ExtType) {

+ default: break;

+ case ISD::SEXTLOAD: // '17' bits known

+ Tmp = LD->getMemoryVT().getSizeInBits();

+ return VTBits-Tmp+1;

+ case ISD::ZEXTLOAD: // '16' bits known

+ Tmp = LD->getMemoryVT().getSizeInBits();

+ return VTBits-Tmp;

+ }

+ // Allow the target to implement this method for its nodes.

+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||

+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_VOID) {

+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);

+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);

+ }

+ // Finally, if we can prove that the top bits of the result are 0's or 1's,

+ // use this information.

+ APInt KnownZero, KnownOne;

+ APInt Mask = APInt::getAllOnesValue(VTBits);

+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);

+ if (KnownZero.isNegative()) { // sign bit is 0

+ Mask = KnownZero;

+ } else if (KnownOne.isNegative()) { // sign bit is 1;

+ Mask = KnownOne;

+ } else {

+ // Nothing known.

+ return FirstAnswer;

+ }

+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine

+ // the number of identical bits in the top of the input value.

+ Mask = ~Mask;

+ Mask <<= Mask.getBitWidth()-VTBits;

+ // Return # leading zeros. We use 'min' here in case Val was zero before

+ // shifting. We don't want to return '64' as for an i32 "0".

+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));

+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {

+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);

+ if (!GA) return false;

+ if (GA->getOffset() != 0) return false;

+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());

+ if (!GV) return false;

+ MachineModuleInfo *MMI = getMachineModuleInfo();

+ return MMI && MMI->hasDebugInfo();

+/// getShuffleScalarElt - Returns the scalar element that will make up the ith

+/// element of the result of the vector shuffle.

+SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,

+ unsigned i) {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ if (N->getMaskElt(i) < 0)

+ return getUNDEF(VT.getVectorElementType());

+ unsigned Index = N->getMaskElt(i);

+ unsigned NumElems = VT.getVectorNumElements();

+ SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);

+ Index %= NumElems;

+ if (V.getOpcode() == ISD::BIT_CONVERT) {

+ V = V.getOperand(0);

+ MVT VVT = V.getValueType();

+ if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)

+ return SDValue();

+ }

+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)

+ return (Index == 0) ? V.getOperand(0)

+ : getUNDEF(VT.getVectorElementType());

+ if (V.getOpcode() == ISD::BUILD_VECTOR)

+ return V.getOperand(Index);

+ if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))

+ return getShuffleScalarElt(SVN, Index);

+ return SDValue();

+/// getNode - Gets or creates the specified node.

+///

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<SDNode>();

+ new (N) SDNode(Opcode, DL, getVTList(VT));

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

+ MVT VT, SDValue Operand) {

+ // Constant fold unary operations with an integer constant operand.

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {

+ const APInt &Val = C->getAPIntValue();

+ unsigned BitWidth = VT.getSizeInBits();

+ switch (Opcode) {

+ default: break;

+ case ISD::SIGN_EXTEND:

+ return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);

+ case ISD::ANY_EXTEND:

+ case ISD::ZERO_EXTEND:

+ case ISD::TRUNCATE:

+ return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);

+ case ISD::UINT_TO_FP:

+ case ISD::SINT_TO_FP: {

+ const uint64_t zero[] = {0, 0};

+ // No compile time operations on this type.

+ if (VT==MVT::ppcf128)

+ break;

+ APFloat apf = APFloat(APInt(BitWidth, 2, zero));

+ (void)apf.convertFromAPInt(Val,

+ Opcode==ISD::SINT_TO_FP,

+ APFloat::rmNearestTiesToEven);

+ return getConstantFP(apf, VT);

+ }

+ case ISD::BIT_CONVERT:

+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)

+ return getConstantFP(Val.bitsToFloat(), VT);

+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)

+ return getConstantFP(Val.bitsToDouble(), VT);

+ break;

+ case ISD::BSWAP:

+ return getConstant(Val.byteSwap(), VT);

+ case ISD::CTPOP:

+ return getConstant(Val.countPopulation(), VT);

+ case ISD::CTLZ:

+ return getConstant(Val.countLeadingZeros(), VT);

+ case ISD::CTTZ:

+ return getConstant(Val.countTrailingZeros(), VT);

+ }

+ // Constant fold unary operations with a floating point constant operand.

+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {

+ APFloat V = C->getValueAPF(); // make copy

+ if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {

+ switch (Opcode) {

+ case ISD::FNEG:

+ V.changeSign();

+ return getConstantFP(V, VT);

+ case ISD::FABS:

+ V.clearSign();

+ return getConstantFP(V, VT);

+ case ISD::FP_ROUND:

+ case ISD::FP_EXTEND: {

+ bool ignored;

+ // This can return overflow, underflow, or inexact; we don't care.

+ // FIXME need to be more flexible about rounding mode.

+ (void)V.convert(*MVTToAPFloatSemantics(VT),

+ APFloat::rmNearestTiesToEven, &ignored);

+ return getConstantFP(V, VT);

+ }

+ case ISD::FP_TO_SINT:

+ case ISD::FP_TO_UINT: {

+ integerPart x[2];

+ bool ignored;

+ assert(integerPartWidth >= 64);

+ // FIXME need to be more flexible about rounding mode.

+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),

+ Opcode==ISD::FP_TO_SINT,

+ APFloat::rmTowardZero, &ignored);

+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual

+ break;

+ APInt api(VT.getSizeInBits(), 2, x);

+ return getConstant(api, VT);

+ }

+ case ISD::BIT_CONVERT:

+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)

+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);

+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)

+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);

+ break;

+ }

+ unsigned OpOpcode = Operand.getNode()->getOpcode();

+ switch (Opcode) {

+ case ISD::TokenFactor:

+ case ISD::MERGE_VALUES:

+ case ISD::CONCAT_VECTORS:

+ return Operand; // Factor, merge or concat of one node? No need.

+ case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node");

+ case ISD::FP_EXTEND:

+ assert(VT.isFloatingPoint() &&

+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");

+ if (Operand.getValueType() == VT) return Operand; // noop conversion.

+ if (Operand.getOpcode() == ISD::UNDEF)

+ return getUNDEF(VT);

+ break;

+ case ISD::SIGN_EXTEND:

+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&

+ "Invalid SIGN_EXTEND!");

+ if (Operand.getValueType() == VT) return Operand; // noop extension

+ assert(Operand.getValueType().bitsLT(VT)

+ && "Invalid sext node, dst < src!");

+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)

+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));

+ break;

+ case ISD::ZERO_EXTEND:

+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&

+ "Invalid ZERO_EXTEND!");

+ if (Operand.getValueType() == VT) return Operand; // noop extension

+ assert(Operand.getValueType().bitsLT(VT)

+ && "Invalid zext node, dst < src!");

+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)

+ return getNode(ISD::ZERO_EXTEND, DL, VT,

+ Operand.getNode()->getOperand(0));

+ break;

+ case ISD::ANY_EXTEND:

+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&

+ "Invalid ANY_EXTEND!");

+ if (Operand.getValueType() == VT) return Operand; // noop extension

+ assert(Operand.getValueType().bitsLT(VT)

+ && "Invalid anyext node, dst < src!");

+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)

+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)

+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));

+ break;

+ case ISD::TRUNCATE:

+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&

+ "Invalid TRUNCATE!");

+ if (Operand.getValueType() == VT) return Operand; // noop truncate

+ assert(Operand.getValueType().bitsGT(VT)

+ && "Invalid truncate node, src < dst!");

+ if (OpOpcode == ISD::TRUNCATE)

+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));

+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||

+ OpOpcode == ISD::ANY_EXTEND) {

+ // If the source is smaller than the dest, we still need an extend.

+ if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT))

+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));

+ else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))

+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));

+ else

+ return Operand.getNode()->getOperand(0);

+ }

+ break;

+ case ISD::BIT_CONVERT:

+ // Basic sanity checking.

+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()

+ && "Cannot BIT_CONVERT between types of different sizes!");

+ if (VT == Operand.getValueType()) return Operand; // noop conversion.

+ if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)

+ return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));

+ if (OpOpcode == ISD::UNDEF)

+ return getUNDEF(VT);

+ break;

+ case ISD::SCALAR_TO_VECTOR:

+ assert(VT.isVector() && !Operand.getValueType().isVector() &&

+ (VT.getVectorElementType() == Operand.getValueType() ||

+ (VT.getVectorElementType().isInteger() &&

+ Operand.getValueType().isInteger() &&

+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&

+ "Illegal SCALAR_TO_VECTOR node!");

+ if (OpOpcode == ISD::UNDEF)

+ return getUNDEF(VT);

+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.

+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&

+ isa<ConstantSDNode>(Operand.getOperand(1)) &&

+ Operand.getConstantOperandVal(1) == 0 &&

+ Operand.getOperand(0).getValueType() == VT)

+ return Operand.getOperand(0);

+ break;

+ case ISD::FNEG:

+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0

+ if (UnsafeFPMath && OpOpcode == ISD::FSUB)

+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),

+ Operand.getNode()->getOperand(0));

+ if (OpOpcode == ISD::FNEG) // --X -> X

+ return Operand.getNode()->getOperand(0);

+ break;

+ case ISD::FABS:

+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)

+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));

+ break;

+ }

+ SDNode *N;

+ SDVTList VTs = getVTList(VT);

+ if (VT != MVT::Flag) { // Don't CSE flag producing nodes

+ FoldingSetNodeID ID;

+ SDValue Ops[1] = { Operand };

+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ N = NodeAllocator.Allocate<UnarySDNode>();

+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);

+ CSEMap.InsertNode(N, IP);

+ } else {

+ N = NodeAllocator.Allocate<UnarySDNode>();

+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);

+ }

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,

+ MVT VT,

+ ConstantSDNode *Cst1,

+ ConstantSDNode *Cst2) {

+ const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();

+ switch (Opcode) {

+ case ISD::ADD: return getConstant(C1 + C2, VT);

+ case ISD::SUB: return getConstant(C1 - C2, VT);

+ case ISD::MUL: return getConstant(C1 * C2, VT);

+ case ISD::UDIV:

+ if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);

+ break;

+ case ISD::UREM:

+ if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);

+ break;

+ case ISD::SDIV:

+ if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);

+ break;

+ case ISD::SREM:

+ if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);

+ break;

+ case ISD::AND: return getConstant(C1 & C2, VT);

+ case ISD::OR: return getConstant(C1 | C2, VT);

+ case ISD::XOR: return getConstant(C1 ^ C2, VT);

+ case ISD::SHL: return getConstant(C1 << C2, VT);

+ case ISD::SRL: return getConstant(C1.lshr(C2), VT);

+ case ISD::SRA: return getConstant(C1.ashr(C2), VT);

+ case ISD::ROTL: return getConstant(C1.rotl(C2), VT);

+ case ISD::ROTR: return getConstant(C1.rotr(C2), VT);

+ default: break;

+ }

+ return SDValue();

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ SDValue N1, SDValue N2) {

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());

+ switch (Opcode) {

+ default: break;

+ case ISD::TokenFactor:

+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&

+ N2.getValueType() == MVT::Other && "Invalid token factor!");

+ // Fold trivial token factors.

+ if (N1.getOpcode() == ISD::EntryToken) return N2;

+ if (N2.getOpcode() == ISD::EntryToken) return N1;

+ if (N1 == N2) return N1;

+ break;

+ case ISD::CONCAT_VECTORS:

+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to

+ // one big BUILD_VECTOR.

+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&

+ N2.getOpcode() == ISD::BUILD_VECTOR) {

+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());

+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());

+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());

+ }

+ break;

+ case ISD::AND:

+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&

+ N1.getValueType() == VT && "Binary operator types must match!");

+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's

+ // worth handling here.

+ if (N2C && N2C->isNullValue())

+ return N2;

+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X

+ return N1;

+ break;

+ case ISD::OR:

+ case ISD::XOR:

+ case ISD::ADD:

+ case ISD::SUB:

+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&

+ N1.getValueType() == VT && "Binary operator types must match!");

+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so

+ // it's worth handling here.

+ if (N2C && N2C->isNullValue())

+ return N1;

+ break;

+ case ISD::UDIV:

+ case ISD::UREM:

+ case ISD::MULHU:

+ case ISD::MULHS:

+ case ISD::MUL:

+ case ISD::SDIV:

+ case ISD::SREM:

+ assert(VT.isInteger() && "This operator does not apply to FP types!");

+ // fall through

+ case ISD::FADD:

+ case ISD::FSUB:

+ case ISD::FMUL:

+ case ISD::FDIV:

+ case ISD::FREM:

+ if (UnsafeFPMath) {

+ if (Opcode == ISD::FADD) {

+ // 0+x --> x

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))

+ if (CFP->getValueAPF().isZero())

+ return N2;

+ // x+0 --> x

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))

+ if (CFP->getValueAPF().isZero())

+ return N1;

+ } else if (Opcode == ISD::FSUB) {

+ // x-0 --> x

+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))

+ if (CFP->getValueAPF().isZero())

+ return N1;

+ }

+ assert(N1.getValueType() == N2.getValueType() &&

+ N1.getValueType() == VT && "Binary operator types must match!");

+ break;

+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.

+ assert(N1.getValueType() == VT &&

+ N1.getValueType().isFloatingPoint() &&

+ N2.getValueType().isFloatingPoint() &&

+ "Invalid FCOPYSIGN!");

+ break;

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ case ISD::ROTL:

+ case ISD::ROTR:

+ assert(VT == N1.getValueType() &&

+ "Shift operators return type must be the same as their first arg");

+ assert(VT.isInteger() && N2.getValueType().isInteger() &&

+ "Shifts only work on integers");

+ // Always fold shifts of i1 values so the code generator doesn't need to

+ // handle them. Since we know the size of the shift has to be less than the

+ // size of the value, the shift/rotate count is guaranteed to be zero.

+ if (VT == MVT::i1)

+ return N1;

+ break;

+ case ISD::FP_ROUND_INREG: {

+ MVT EVT = cast<VTSDNode>(N2)->getVT();

+ assert(VT == N1.getValueType() && "Not an inreg round!");

+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&

+ "Cannot FP_ROUND_INREG integer types");

+ assert(EVT.bitsLE(VT) && "Not rounding down!");

+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.

+ break;

+ }

+ case ISD::FP_ROUND:

+ assert(VT.isFloatingPoint() &&

+ N1.getValueType().isFloatingPoint() &&

+ VT.bitsLE(N1.getValueType()) &&

+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");

+ if (N1.getValueType() == VT) return N1; // noop conversion.

+ break;

+ case ISD::AssertSext:

+ case ISD::AssertZext: {

+ MVT EVT = cast<VTSDNode>(N2)->getVT();

+ assert(VT == N1.getValueType() && "Not an inreg extend!");

+ assert(VT.isInteger() && EVT.isInteger() &&

+ "Cannot *_EXTEND_INREG FP types");

+ assert(EVT.bitsLE(VT) && "Not extending!");

+ if (VT == EVT) return N1; // noop assertion.

+ break;

+ }

+ case ISD::SIGN_EXTEND_INREG: {

+ MVT EVT = cast<VTSDNode>(N2)->getVT();

+ assert(VT == N1.getValueType() && "Not an inreg extend!");

+ assert(VT.isInteger() && EVT.isInteger() &&

+ "Cannot *_EXTEND_INREG FP types");

+ assert(EVT.bitsLE(VT) && "Not extending!");

+ if (EVT == VT) return N1; // Not actually extending

+ if (N1C) {

+ APInt Val = N1C->getAPIntValue();

+ unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits();

+ Val <<= Val.getBitWidth()-FromBits;

+ Val = Val.ashr(Val.getBitWidth()-FromBits);

+ return getConstant(Val, VT);

+ }

+ break;

+ }

+ case ISD::EXTRACT_VECTOR_ELT:

+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.

+ if (N1.getOpcode() == ISD::UNDEF)

+ return getUNDEF(VT);

+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is

+ // expanding copies of large vectors from registers.

+ if (N2C &&

+ N1.getOpcode() == ISD::CONCAT_VECTORS &&

+ N1.getNumOperands() > 0) {

+ unsigned Factor =

+ N1.getOperand(0).getValueType().getVectorNumElements();

+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,

+ N1.getOperand(N2C->getZExtValue() / Factor),

+ getConstant(N2C->getZExtValue() % Factor,

+ N2.getValueType()));

+ }

+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is

+ // expanding large vector constants.

+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {

+ SDValue Elt = N1.getOperand(N2C->getZExtValue());

+ if (Elt.getValueType() != VT) {

+ // If the vector element type is not legal, the BUILD_VECTOR operands

+ // are promoted and implicitly truncated. Make that explicit here.

+ assert(VT.isInteger() && Elt.getValueType().isInteger() &&

+ VT.bitsLE(Elt.getValueType()) &&

+ "Bad type for BUILD_VECTOR operand");

+ Elt = getNode(ISD::TRUNCATE, DL, VT, Elt);

+ }

+ return Elt;

+ }

+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector

+ // operations are lowered to scalars.

+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {

+ // If the indices are the same, return the inserted element.

+ if (N1.getOperand(2) == N2)

+ return N1.getOperand(1);

+ // If the indices are known different, extract the element from

+ // the original vector.

+ else if (isa<ConstantSDNode>(N1.getOperand(2)) &&

+ isa<ConstantSDNode>(N2))

+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);

+ }

+ break;

+ case ISD::EXTRACT_ELEMENT:

+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");

+ assert(!N1.getValueType().isVector() && !VT.isVector() &&

+ (N1.getValueType().isInteger() == VT.isInteger()) &&

+ "Wrong types for EXTRACT_ELEMENT!");

+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding

+ // 64-bit integers into 32-bit parts. Instead of building the extract of

+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.

+ if (N1.getOpcode() == ISD::BUILD_PAIR)

+ return N1.getOperand(N2C->getZExtValue());

+ // EXTRACT_ELEMENT of a constant int is also very common.

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {

+ unsigned ElementSize = VT.getSizeInBits();

+ unsigned Shift = ElementSize * N2C->getZExtValue();

+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);

+ return getConstant(ShiftedVal.trunc(ElementSize), VT);

+ }

+ break;

+ case ISD::EXTRACT_SUBVECTOR:

+ if (N1.getValueType() == VT) // Trivial extraction.

+ return N1;

+ break;

+ }

+ if (N1C) {

+ if (N2C) {

+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);

+ if (SV.getNode()) return SV;

+ } else { // Cannonicalize constant to RHS if commutative

+ if (isCommutativeBinOp(Opcode)) {

+ std::swap(N1C, N2C);

+ std::swap(N1, N2);

+ }

+ // Constant fold FP operations.

+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());

+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());

+ if (N1CFP) {

+ if (!N2CFP && isCommutativeBinOp(Opcode)) {

+ // Cannonicalize constant to RHS if commutative

+ std::swap(N1CFP, N2CFP);

+ std::swap(N1, N2);

+ } else if (N2CFP && VT != MVT::ppcf128) {

+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();

+ APFloat::opStatus s;

+ switch (Opcode) {

+ case ISD::FADD:

+ s = V1.add(V2, APFloat::rmNearestTiesToEven);

+ if (s != APFloat::opInvalidOp)

+ return getConstantFP(V1, VT);

+ break;

+ case ISD::FSUB:

+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);

+ if (s!=APFloat::opInvalidOp)

+ return getConstantFP(V1, VT);

+ break;

+ case ISD::FMUL:

+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);

+ if (s!=APFloat::opInvalidOp)

+ return getConstantFP(V1, VT);

+ break;

+ case ISD::FDIV:

+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);

+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)

+ return getConstantFP(V1, VT);

+ break;

+ case ISD::FREM :

+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);

+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)

+ return getConstantFP(V1, VT);

+ break;

+ case ISD::FCOPYSIGN:

+ V1.copySign(V2);

+ return getConstantFP(V1, VT);

+ default: break;

+ }

+ // Canonicalize an UNDEF to the RHS, even over a constant.

+ if (N1.getOpcode() == ISD::UNDEF) {

+ if (isCommutativeBinOp(Opcode)) {

+ std::swap(N1, N2);

+ } else {

+ switch (Opcode) {

+ case ISD::FP_ROUND_INREG:

+ case ISD::SIGN_EXTEND_INREG:

+ case ISD::SUB:

+ case ISD::FSUB:

+ case ISD::FDIV:

+ case ISD::FREM:

+ case ISD::SRA:

+ return N1; // fold op(undef, arg2) -> undef

+ case ISD::UDIV:

+ case ISD::SDIV:

+ case ISD::UREM:

+ case ISD::SREM:

+ case ISD::SRL:

+ case ISD::SHL:

+ if (!VT.isVector())

+ return getConstant(0, VT); // fold op(undef, arg2) -> 0

+ // For vectors, we can't easily build an all zero vector, just return

+ // the LHS.

+ return N2;

+ }

+ // Fold a bunch of operators when the RHS is undef.

+ if (N2.getOpcode() == ISD::UNDEF) {

+ switch (Opcode) {

+ case ISD::XOR:

+ if (N1.getOpcode() == ISD::UNDEF)

+ // Handle undef ^ undef -> 0 special case. This is a common

+ // idiom (misuse).

+ return getConstant(0, VT);

+ // fallthrough

+ case ISD::ADD:

+ case ISD::ADDC:

+ case ISD::ADDE:

+ case ISD::SUB:

+ case ISD::FADD:

+ case ISD::FSUB:

+ case ISD::FMUL:

+ case ISD::FDIV:

+ case ISD::FREM:

+ case ISD::UDIV:

+ case ISD::SDIV:

+ case ISD::UREM:

+ case ISD::SREM:

+ return N2; // fold op(arg1, undef) -> undef

+ case ISD::MUL:

+ case ISD::AND:

+ case ISD::SRL:

+ case ISD::SHL:

+ if (!VT.isVector())

+ return getConstant(0, VT); // fold op(arg1, undef) -> 0

+ // For vectors, we can't easily build an all zero vector, just return

+ // the LHS.

+ return N1;

+ case ISD::OR:

+ if (!VT.isVector())

+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);

+ // For vectors, we can't easily build an all one vector, just return

+ // the LHS.

+ return N1;

+ case ISD::SRA:

+ return N1;

+ }

+ // Memoize this node if possible.

+ SDNode *N;

+ SDVTList VTs = getVTList(VT);

+ if (VT != MVT::Flag) {

+ SDValue Ops[] = { N1, N2 };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ N = NodeAllocator.Allocate<BinarySDNode>();

+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);

+ CSEMap.InsertNode(N, IP);

+ } else {

+ N = NodeAllocator.Allocate<BinarySDNode>();

+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);

+ }

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ SDValue N1, SDValue N2, SDValue N3) {

+ // Perform various simplifications.

+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());

+ switch (Opcode) {

+ case ISD::CONCAT_VECTORS:

+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to

+ // one big BUILD_VECTOR.

+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&

+ N2.getOpcode() == ISD::BUILD_VECTOR &&

+ N3.getOpcode() == ISD::BUILD_VECTOR) {

+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());

+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());

+ Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());

+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());

+ }

+ break;

+ case ISD::SETCC: {

+ // Use FoldSetCC to simplify SETCC's.

+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);

+ if (Simp.getNode()) return Simp;

+ break;

+ }

+ case ISD::SELECT:

+ if (N1C) {

+ if (N1C->getZExtValue())

+ return N2; // select true, X, Y -> X

+ else

+ return N3; // select false, X, Y -> Y

+ }

+ if (N2 == N3) return N2; // select C, X, X -> X

+ break;

+ case ISD::BRCOND:

+ if (N2C) {

+ if (N2C->getZExtValue()) // Unconditional branch

+ return getNode(ISD::BR, DL, MVT::Other, N1, N3);

+ else

+ return N1; // Never-taken branch

+ }

+ break;

+ case ISD::VECTOR_SHUFFLE:

+ assert(0 && "should use getVectorShuffle constructor!");

+ break;

+ case ISD::BIT_CONVERT:

+ // Fold bit_convert nodes from a type to themselves.

+ if (N1.getValueType() == VT)

+ return N1;

+ break;

+ }

+ // Memoize node if it doesn't produce a flag.

+ SDNode *N;

+ SDVTList VTs = getVTList(VT);

+ if (VT != MVT::Flag) {

+ SDValue Ops[] = { N1, N2, N3 };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ N = NodeAllocator.Allocate<TernarySDNode>();

+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);

+ CSEMap.InsertNode(N, IP);

+ } else {

+ N = NodeAllocator.Allocate<TernarySDNode>();

+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);

+ }

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ SDValue N1, SDValue N2, SDValue N3,

+ SDValue N4) {

+ SDValue Ops[] = { N1, N2, N3, N4 };

+ return getNode(Opcode, DL, VT, Ops, 4);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ SDValue N1, SDValue N2, SDValue N3,

+ SDValue N4, SDValue N5) {

+ SDValue Ops[] = { N1, N2, N3, N4, N5 };

+ return getNode(Opcode, DL, VT, Ops, 5);

+/// getMemsetValue - Vectorized representation of the memset value

+/// operand.

+static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,

+ DebugLoc dl) {

+ unsigned NumBits = VT.isVector() ?

+ VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {

+ APInt Val = APInt(NumBits, C->getZExtValue() & 255);

+ unsigned Shift = 8;

+ for (unsigned i = NumBits; i > 8; i >>= 1) {

+ Val = (Val << Shift) | Val;

+ Shift <<= 1;

+ }

+ if (VT.isInteger())

+ return DAG.getConstant(Val, VT);

+ return DAG.getConstantFP(APFloat(Val), VT);

+ }

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);

+ unsigned Shift = 8;

+ for (unsigned i = NumBits; i > 8; i >>= 1) {

+ Value = DAG.getNode(ISD::OR, dl, VT,

+ DAG.getNode(ISD::SHL, dl, VT, Value,

+ DAG.getConstant(Shift,

+ TLI.getShiftAmountTy())),

+ Value);

+ Shift <<= 1;

+ }

+ return Value;

+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only

+/// used when a memcpy is turned into a memset when the source is a constant

+/// string ptr.

+static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,

+ const TargetLowering &TLI,

+ std::string &Str, unsigned Offset) {

+ // Handle vector with all elements zero.

+ if (Str.empty()) {

+ if (VT.isInteger())

+ return DAG.getConstant(0, VT);

+ unsigned NumElts = VT.getVectorNumElements();

+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;

+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,

+ DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts)));

+ }

+ assert(!VT.isVector() && "Can't handle vector type here!");

+ unsigned NumBits = VT.getSizeInBits();

+ unsigned MSB = NumBits / 8;

+ uint64_t Val = 0;

+ if (TLI.isLittleEndian())

+ Offset = Offset + MSB - 1;

+ for (unsigned i = 0; i != MSB; ++i) {

+ Val = (Val << 8) | (unsigned char)Str[Offset];

+ Offset += TLI.isLittleEndian() ? -1 : 1;

+ }

+ return DAG.getConstant(Val, VT);

+/// getMemBasePlusOffset - Returns base and offset node for the

+///

+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,

+ SelectionDAG &DAG) {

+ MVT VT = Base.getValueType();

+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),

+ VT, Base, DAG.getConstant(Offset, VT));

+/// isMemSrcFromString - Returns true if memcpy source is a string constant.

+///

+static bool isMemSrcFromString(SDValue Src, std::string &Str) {

+ unsigned SrcDelta = 0;

+ GlobalAddressSDNode *G = NULL;

+ if (Src.getOpcode() == ISD::GlobalAddress)

+ G = cast<GlobalAddressSDNode>(Src);

+ else if (Src.getOpcode() == ISD::ADD &&

+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&

+ Src.getOperand(1).getOpcode() == ISD::Constant) {

+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));

+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();

+ }

+ if (!G)

+ return false;

+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());

+ if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))

+ return true;

+ return false;

+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required

+/// to replace the memset / memcpy is below the threshold. It also returns the

+/// types of the sequence of memory ops to perform memset / memcpy.

+static

+bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,

+ SDValue Dst, SDValue Src,

+ unsigned Limit, uint64_t Size, unsigned &Align,

+ std::string &Str, bool &isSrcStr,

+ SelectionDAG &DAG,

+ const TargetLowering &TLI) {

+ isSrcStr = isMemSrcFromString(Src, Str);

+ bool isSrcConst = isa<ConstantSDNode>(Src);

+ bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();

+ MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);

+ if (VT != MVT::iAny) {

+ unsigned NewAlign = (unsigned)

+ TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());

+ // If source is a string constant, this will require an unaligned load.

+ if (NewAlign > Align && (isSrcConst || AllowUnalign)) {

+ if (Dst.getOpcode() != ISD::FrameIndex) {

+ // Can't change destination alignment. It requires a unaligned store.

+ if (AllowUnalign)

+ VT = MVT::iAny;

+ } else {

+ int FI = cast<FrameIndexSDNode>(Dst)->getIndex();

+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

+ if (MFI->isFixedObjectIndex(FI)) {

+ // Can't change destination alignment. It requires a unaligned store.

+ if (AllowUnalign)

+ VT = MVT::iAny;

+ } else {

+ // Give the stack frame object a larger alignment if needed.

+ if (MFI->getObjectAlignment(FI) < NewAlign)

+ MFI->setObjectAlignment(FI, NewAlign);

+ Align = NewAlign;

+ }

+ if (VT == MVT::iAny) {

+ if (AllowUnalign) {

+ VT = MVT::i64;

+ } else {

+ switch (Align & 7) {

+ case 0: VT = MVT::i64; break;

+ case 4: VT = MVT::i32; break;

+ case 2: VT = MVT::i16; break;

+ default: VT = MVT::i8; break;

+ }

+ MVT LVT = MVT::i64;

+ while (!TLI.isTypeLegal(LVT))

+ LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1);

+ assert(LVT.isInteger());

+ if (VT.bitsGT(LVT))

+ VT = LVT;

+ }

+ unsigned NumMemOps = 0;

+ while (Size != 0) {

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ while (VTSize > Size) {

+ // For now, only use non-vector load / store's for the left-over pieces.

+ if (VT.isVector()) {

+ VT = MVT::i64;

+ while (!TLI.isTypeLegal(VT))

+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);

+ VTSize = VT.getSizeInBits() / 8;

+ } else {

+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);

+ VTSize >>= 1;

+ }

+ if (++NumMemOps > Limit)

+ return false;

+ MemOps.push_back(VT);

+ Size -= VTSize;

+ }

+ return true;

+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

+ SDValue Chain, SDValue Dst,

+ SDValue Src, uint64_t Size,

+ unsigned Align, bool AlwaysInline,

+ const Value *DstSV, uint64_t DstSVOff,

+ const Value *SrcSV, uint64_t SrcSVOff){

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ // Expand memcpy to a series of load and store ops if the size operand falls

+ // below a certain threshold.

+ std::vector<MVT> MemOps;

+ uint64_t Limit = -1ULL;

+ if (!AlwaysInline)

+ Limit = TLI.getMaxStoresPerMemcpy();

+ unsigned DstAlign = Align; // Destination alignment can change.

+ std::string Str;

+ bool CopyFromStr;

+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,

+ Str, CopyFromStr, DAG, TLI))

+ return SDValue();

+ bool isZeroStr = CopyFromStr && Str.empty();

+ SmallVector<SDValue, 8> OutChains;

+ unsigned NumMemOps = MemOps.size();

+ uint64_t SrcOff = 0, DstOff = 0;

+ for (unsigned i = 0; i < NumMemOps; i++) {

+ MVT VT = MemOps[i];

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ SDValue Value, Store;

+ if (CopyFromStr && (isZeroStr || !VT.isVector())) {

+ // It's unlikely a store of a vector immediate can be done in a single

+ // instruction. It would require a load from a constantpool first.

+ // We also handle store a vector with all zero's.

+ // FIXME: Handle other cases where store of vector immediate is done in

+ // a single instruction.

+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);

+ Store = DAG.getStore(Chain, dl, Value,

+ getMemBasePlusOffset(Dst, DstOff, DAG),

+ DstSV, DstSVOff + DstOff, false, DstAlign);

+ } else {

+ Value = DAG.getLoad(VT, dl, Chain,

+ getMemBasePlusOffset(Src, SrcOff, DAG),

+ SrcSV, SrcSVOff + SrcOff, false, Align);

+ Store = DAG.getStore(Chain, dl, Value,

+ getMemBasePlusOffset(Dst, DstOff, DAG),

+ DstSV, DstSVOff + DstOff, false, DstAlign);

+ }

+ OutChains.push_back(Store);

+ SrcOff += VTSize;

+ DstOff += VTSize;

+ }

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ &OutChains[0], OutChains.size());

+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

+ SDValue Chain, SDValue Dst,

+ SDValue Src, uint64_t Size,

+ unsigned Align, bool AlwaysInline,

+ const Value *DstSV, uint64_t DstSVOff,

+ const Value *SrcSV, uint64_t SrcSVOff){

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ // Expand memmove to a series of load and store ops if the size operand falls

+ // below a certain threshold.

+ std::vector<MVT> MemOps;

+ uint64_t Limit = -1ULL;

+ if (!AlwaysInline)

+ Limit = TLI.getMaxStoresPerMemmove();

+ unsigned DstAlign = Align; // Destination alignment can change.

+ std::string Str;

+ bool CopyFromStr;

+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,

+ Str, CopyFromStr, DAG, TLI))

+ return SDValue();

+ uint64_t SrcOff = 0, DstOff = 0;

+ SmallVector<SDValue, 8> LoadValues;

+ SmallVector<SDValue, 8> LoadChains;

+ SmallVector<SDValue, 8> OutChains;

+ unsigned NumMemOps = MemOps.size();

+ for (unsigned i = 0; i < NumMemOps; i++) {

+ MVT VT = MemOps[i];

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ SDValue Value, Store;

+ Value = DAG.getLoad(VT, dl, Chain,

+ getMemBasePlusOffset(Src, SrcOff, DAG),

+ SrcSV, SrcSVOff + SrcOff, false, Align);

+ LoadValues.push_back(Value);

+ LoadChains.push_back(Value.getValue(1));

+ SrcOff += VTSize;

+ }

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ &LoadChains[0], LoadChains.size());

+ OutChains.clear();

+ for (unsigned i = 0; i < NumMemOps; i++) {

+ MVT VT = MemOps[i];

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ SDValue Value, Store;

+ Store = DAG.getStore(Chain, dl, LoadValues[i],

+ getMemBasePlusOffset(Dst, DstOff, DAG),

+ DstSV, DstSVOff + DstOff, false, DstAlign);

+ OutChains.push_back(Store);

+ DstOff += VTSize;

+ }

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ &OutChains[0], OutChains.size());

+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

+ SDValue Chain, SDValue Dst,

+ SDValue Src, uint64_t Size,

+ unsigned Align,

+ const Value *DstSV, uint64_t DstSVOff) {

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ // Expand memset to a series of load/store ops if the size operand

+ // falls below a certain threshold.

+ std::vector<MVT> MemOps;

+ std::string Str;

+ bool CopyFromStr;

+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),

+ Size, Align, Str, CopyFromStr, DAG, TLI))

+ return SDValue();

+ SmallVector<SDValue, 8> OutChains;

+ uint64_t DstOff = 0;

+ unsigned NumMemOps = MemOps.size();

+ for (unsigned i = 0; i < NumMemOps; i++) {

+ MVT VT = MemOps[i];

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ SDValue Value = getMemsetValue(Src, VT, DAG, dl);

+ SDValue Store = DAG.getStore(Chain, dl, Value,

+ getMemBasePlusOffset(Dst, DstOff, DAG),

+ DstSV, DstSVOff + DstOff);

+ OutChains.push_back(Store);

+ DstOff += VTSize;

+ }

+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

+ &OutChains[0], OutChains.size());

+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,

+ SDValue Src, SDValue Size,

+ unsigned Align, bool AlwaysInline,

+ const Value *DstSV, uint64_t DstSVOff,

+ const Value *SrcSV, uint64_t SrcSVOff) {

+ // Check to see if we should lower the memcpy to loads and stores first.

+ // For cases within the target-specified limits, this is the best choice.

+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

+ if (ConstantSize) {

+ // Memcpy with size zero? Just return the original chain.

+ if (ConstantSize->isNullValue())

+ return Chain;

+ SDValue Result =

+ getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,

+ ConstantSize->getZExtValue(),

+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);

+ if (Result.getNode())

+ return Result;

+ }

+ // Then check to see if we should lower the memcpy with target-specific

+ // code. If the target chooses to do this, this is the next best.

+ SDValue Result =

+ TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,

+ AlwaysInline,

+ DstSV, DstSVOff, SrcSV, SrcSVOff);

+ if (Result.getNode())

+ return Result;

+ // If we really need inline code and the target declined to provide it,

+ // use a (potentially long) sequence of loads and stores.

+ if (AlwaysInline) {

+ assert(ConstantSize && "AlwaysInline requires a constant size!");

+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,

+ ConstantSize->getZExtValue(), Align, true,

+ DstSV, DstSVOff, SrcSV, SrcSVOff);

+ }

+ // Emit a library call.

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Entry.Ty = TLI.getTargetData()->getIntPtrType();

+ Entry.Node = Dst; Args.push_back(Entry);

+ Entry.Node = Src; Args.push_back(Entry);

+ Entry.Node = Size; Args.push_back(Entry);

+ // FIXME: pass in DebugLoc

+ std::pair<SDValue,SDValue> CallResult =

+ TLI.LowerCallTo(Chain, Type::VoidTy,

+ false, false, false, false, CallingConv::C, false,

+ getExternalSymbol("memcpy", TLI.getPointerTy()),

+ Args, *this, dl);

+ return CallResult.second;

+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,

+ SDValue Src, SDValue Size,

+ unsigned Align,

+ const Value *DstSV, uint64_t DstSVOff,

+ const Value *SrcSV, uint64_t SrcSVOff) {

+ // Check to see if we should lower the memmove to loads and stores first.

+ // For cases within the target-specified limits, this is the best choice.

+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

+ if (ConstantSize) {

+ // Memmove with size zero? Just return the original chain.

+ if (ConstantSize->isNullValue())

+ return Chain;

+ SDValue Result =

+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,

+ ConstantSize->getZExtValue(),

+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);

+ if (Result.getNode())

+ return Result;

+ }

+ // Then check to see if we should lower the memmove with target-specific

+ // code. If the target chooses to do this, this is the next best.

+ SDValue Result =

+ TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,

+ DstSV, DstSVOff, SrcSV, SrcSVOff);

+ if (Result.getNode())

+ return Result;

+ // Emit a library call.

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Entry.Ty = TLI.getTargetData()->getIntPtrType();

+ Entry.Node = Dst; Args.push_back(Entry);

+ Entry.Node = Src; Args.push_back(Entry);

+ Entry.Node = Size; Args.push_back(Entry);

+ // FIXME: pass in DebugLoc

+ std::pair<SDValue,SDValue> CallResult =

+ TLI.LowerCallTo(Chain, Type::VoidTy,

+ false, false, false, false, CallingConv::C, false,

+ getExternalSymbol("memmove", TLI.getPointerTy()),

+ Args, *this, dl);

+ return CallResult.second;

+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,

+ SDValue Src, SDValue Size,

+ unsigned Align,

+ const Value *DstSV, uint64_t DstSVOff) {

+ // Check to see if we should lower the memset to stores first.

+ // For cases within the target-specified limits, this is the best choice.

+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

+ if (ConstantSize) {

+ // Memset with size zero? Just return the original chain.

+ if (ConstantSize->isNullValue())

+ return Chain;

+ SDValue Result =

+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),

+ Align, DstSV, DstSVOff);

+ if (Result.getNode())

+ return Result;

+ }

+ // Then check to see if we should lower the memset with target-specific

+ // code. If the target chooses to do this, this is the next best.

+ SDValue Result =

+ TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,

+ DstSV, DstSVOff);

+ if (Result.getNode())

+ return Result;

+ // Emit a library call.

+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Entry.Node = Dst; Entry.Ty = IntPtrTy;

+ Args.push_back(Entry);

+ // Extend or truncate the argument to be an i32 value for the call.

+ if (Src.getValueType().bitsGT(MVT::i32))

+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);

+ else

+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);

+ Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;

+ Args.push_back(Entry);

+ Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;

+ Args.push_back(Entry);

+ // FIXME: pass in DebugLoc

+ std::pair<SDValue,SDValue> CallResult =

+ TLI.LowerCallTo(Chain, Type::VoidTy,

+ false, false, false, false, CallingConv::C, false,

+ getExternalSymbol("memset", TLI.getPointerTy()),

+ Args, *this, dl);

+ return CallResult.second;

+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,

+ SDValue Chain,

+ SDValue Ptr, SDValue Cmp,

+ SDValue Swp, const Value* PtrVal,

+ unsigned Alignment) {

+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");

+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");

+ MVT VT = Cmp.getValueType();

+ if (Alignment == 0) // Ensure that codegen never sees alignment 0

+ Alignment = getMVTAlignment(MemVT);

+ SDVTList VTs = getVTList(VT, MVT::Other);

+ FoldingSetNodeID ID;

+ ID.AddInteger(MemVT.getRawBits());

+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};

+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);

+ void* IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();

+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,

+ Chain, Ptr, Cmp, Swp, PtrVal, Alignment);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,

+ SDValue Chain,

+ SDValue Ptr, SDValue Val,

+ const Value* PtrVal,

+ unsigned Alignment) {

+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||

+ Opcode == ISD::ATOMIC_LOAD_SUB ||

+ Opcode == ISD::ATOMIC_LOAD_AND ||

+ Opcode == ISD::ATOMIC_LOAD_OR ||

+ Opcode == ISD::ATOMIC_LOAD_XOR ||

+ Opcode == ISD::ATOMIC_LOAD_NAND ||

+ Opcode == ISD::ATOMIC_LOAD_MIN ||

+ Opcode == ISD::ATOMIC_LOAD_MAX ||

+ Opcode == ISD::ATOMIC_LOAD_UMIN ||

+ Opcode == ISD::ATOMIC_LOAD_UMAX ||

+ Opcode == ISD::ATOMIC_SWAP) &&

+ "Invalid Atomic Op");

+ MVT VT = Val.getValueType();

+ if (Alignment == 0) // Ensure that codegen never sees alignment 0

+ Alignment = getMVTAlignment(MemVT);

+ SDVTList VTs = getVTList(VT, MVT::Other);

+ FoldingSetNodeID ID;

+ ID.AddInteger(MemVT.getRawBits());

+ SDValue Ops[] = {Chain, Ptr, Val};

+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);

+ void* IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();

+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,

+ Chain, Ptr, Val, PtrVal, Alignment);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+/// getMergeValues - Create a MERGE_VALUES node from the given operands.

+/// Allowed to return something different (and simpler) if Simplify is true.

+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,

+ DebugLoc dl) {

+ if (NumOps == 1)

+ return Ops[0];

+ SmallVector<MVT, 4> VTs;

+ VTs.reserve(NumOps);

+ for (unsigned i = 0; i < NumOps; ++i)

+ VTs.push_back(Ops[i].getValueType());

+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),

+ Ops, NumOps);

+SDValue

+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,

+ const MVT *VTs, unsigned NumVTs,

+ const SDValue *Ops, unsigned NumOps,

+ MVT MemVT, const Value *srcValue, int SVOff,

+ unsigned Align, bool Vol,

+ bool ReadMem, bool WriteMem) {

+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,

+ MemVT, srcValue, SVOff, Align, Vol,

+ ReadMem, WriteMem);

+SDValue

+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,

+ const SDValue *Ops, unsigned NumOps,

+ MVT MemVT, const Value *srcValue, int SVOff,

+ unsigned Align, bool Vol,

+ bool ReadMem, bool WriteMem) {

+ // Memoize the node unless it returns a flag.

+ MemIntrinsicSDNode *N;

+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();

+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,

+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);

+ CSEMap.InsertNode(N, IP);

+ } else {

+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();

+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,

+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);

+ }

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue

+SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,

+ bool IsTailCall, bool IsInreg, SDVTList VTs,

+ const SDValue *Operands, unsigned NumOperands) {

+ // Do not include isTailCall in the folding set profile.

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);

+ ID.AddInteger(CallingConv);

+ ID.AddInteger(IsVarArgs);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {

+ // Instead of including isTailCall in the folding set, we just

+ // set the flag of the existing node.

+ if (!IsTailCall)

+ cast<CallSDNode>(E)->setNotTailCall();

+ return SDValue(E, 0);

+ }

+ SDNode *N = NodeAllocator.Allocate<CallSDNode>();

+ new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,

+ VTs, Operands, NumOperands);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue

+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,

+ ISD::LoadExtType ExtType, MVT VT, SDValue Chain,

+ SDValue Ptr, SDValue Offset,

+ const Value *SV, int SVOffset, MVT EVT,

+ bool isVolatile, unsigned Alignment) {

+ if (Alignment == 0) // Ensure that codegen never sees alignment 0

+ Alignment = getMVTAlignment(VT);

+ if (VT == EVT) {

+ ExtType = ISD::NON_EXTLOAD;

+ } else if (ExtType == ISD::NON_EXTLOAD) {

+ assert(VT == EVT && "Non-extending load from different memory type!");

+ } else {

+ // Extending load.

+ if (VT.isVector())

+ assert(EVT.getVectorNumElements() == VT.getVectorNumElements() &&

+ "Invalid vector extload!");

+ else

+ assert(EVT.bitsLT(VT) &&

+ "Should only be an extending load, not truncating!");

+ assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&

+ "Cannot sign/zero extend a FP/Vector load!");

+ assert(VT.isInteger() == EVT.isInteger() &&

+ "Cannot convert from FP to Int or Int -> FP!");

+ }

+ bool Indexed = AM != ISD::UNINDEXED;

+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&

+ "Unindexed load with an offset!");

+ SDVTList VTs = Indexed ?

+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);

+ SDValue Ops[] = { Chain, Ptr, Offset };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);

+ ID.AddInteger(EVT.getRawBits());

+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment));

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<LoadSDNode>();

+ new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset,

+ Alignment, isVolatile);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,

+ SDValue Chain, SDValue Ptr,

+ const Value *SV, int SVOffset,

+ bool isVolatile, unsigned Alignment) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,

+ SV, SVOffset, VT, isVolatile, Alignment);

+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,

+ SDValue Chain, SDValue Ptr,

+ const Value *SV,

+ int SVOffset, MVT EVT,

+ bool isVolatile, unsigned Alignment) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,

+ SV, SVOffset, EVT, isVolatile, Alignment);

+SDValue

+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,

+ SDValue Offset, ISD::MemIndexedMode AM) {

+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);

+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&

+ "Load is already a indexed load!");

+ return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),

+ LD->getChain(), Base, Offset, LD->getSrcValue(),

+ LD->getSrcValueOffset(), LD->getMemoryVT(),

+ LD->isVolatile(), LD->getAlignment());

+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,

+ SDValue Ptr, const Value *SV, int SVOffset,

+ bool isVolatile, unsigned Alignment) {

+ MVT VT = Val.getValueType();

+ if (Alignment == 0) // Ensure that codegen never sees alignment 0

+ Alignment = getMVTAlignment(VT);

+ SDVTList VTs = getVTList(MVT::Other);

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ SDValue Ops[] = { Chain, Val, Ptr, Undef };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);

+ ID.AddInteger(VT.getRawBits());

+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED,

+ isVolatile, Alignment));

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();

+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false,

+ VT, SV, SVOffset, Alignment, isVolatile);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,

+ SDValue Ptr, const Value *SV,

+ int SVOffset, MVT SVT,

+ bool isVolatile, unsigned Alignment) {

+ MVT VT = Val.getValueType();

+ if (VT == SVT)

+ return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment);

+ assert(VT.bitsGT(SVT) && "Not a truncation?");

+ assert(VT.isInteger() == SVT.isInteger() &&

+ "Can't do FP-INT conversion!");

+ if (Alignment == 0) // Ensure that codegen never sees alignment 0

+ Alignment = getMVTAlignment(VT);

+ SDVTList VTs = getVTList(MVT::Other);

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ SDValue Ops[] = { Chain, Val, Ptr, Undef };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);

+ ID.AddInteger(SVT.getRawBits());

+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED,

+ isVolatile, Alignment));

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();

+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true,

+ SVT, SV, SVOffset, Alignment, isVolatile);

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue

+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,

+ SDValue Offset, ISD::MemIndexedMode AM) {

+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);

+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&

+ "Store is already a indexed store!");

+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);

+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);

+ ID.AddInteger(ST->getMemoryVT().getRawBits());

+ ID.AddInteger(ST->getRawSubclassData());

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();

+ new (N) StoreSDNode(Ops, dl, VTs, AM,

+ ST->isTruncatingStore(), ST->getMemoryVT(),

+ ST->getSrcValue(), ST->getSrcValueOffset(),

+ ST->getAlignment(), ST->isVolatile());

+ CSEMap.InsertNode(N, IP);

+ AllNodes.push_back(N);

+ return SDValue(N, 0);

+SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl,

+ SDValue Chain, SDValue Ptr,

+ SDValue SV) {

+ SDValue Ops[] = { Chain, Ptr, SV };

+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ const SDUse *Ops, unsigned NumOps) {

+ switch (NumOps) {

+ case 0: return getNode(Opcode, DL, VT);

+ case 1: return getNode(Opcode, DL, VT, Ops[0]);

+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);

+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);

+ default: break;

+ }

+ // Copy from an SDUse array into an SDValue array for use with

+ // the regular getNode logic.

+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);

+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,

+ const SDValue *Ops, unsigned NumOps) {

+ switch (NumOps) {

+ case 0: return getNode(Opcode, DL, VT);

+ case 1: return getNode(Opcode, DL, VT, Ops[0]);

+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);

+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);

+ default: break;

+ }

+ switch (Opcode) {

+ default: break;

+ case ISD::SELECT_CC: {

+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");

+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&

+ "LHS and RHS of condition must have same type!");

+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&

+ "True and False arms of SelectCC must have same type!");

+ assert(Ops[2].getValueType() == VT &&

+ "select_cc node must be of same type as true and false value!");

+ break;

+ }

+ case ISD::BR_CC: {

+ assert(NumOps == 5 && "BR_CC takes 5 operands!");

+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&

+ "LHS/RHS of comparison should match types!");

+ break;

+ }

+ // Memoize nodes.

+ SDNode *N;

+ SDVTList VTs = getVTList(VT);

+ if (VT != MVT::Flag) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ N = NodeAllocator.Allocate<SDNode>();

+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);

+ CSEMap.InsertNode(N, IP);

+ } else {

+ N = NodeAllocator.Allocate<SDNode>();

+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);

+ }

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

+ const std::vector<MVT> &ResultTys,

+ const SDValue *Ops, unsigned NumOps) {

+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),

+ Ops, NumOps);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

+ const MVT *VTs, unsigned NumVTs,

+ const SDValue *Ops, unsigned NumOps) {

+ if (NumVTs == 1)

+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);

+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ const SDValue *Ops, unsigned NumOps) {

+ if (VTList.NumVTs == 1)

+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);

+ switch (Opcode) {

+ // FIXME: figure out how to safely handle things like

+ // int foo(int x) { return 1 << (x & 255); }

+ // int bar() { return foo(256); }

+#if 0

+ case ISD::SRA_PARTS:

+ case ISD::SRL_PARTS:

+ case ISD::SHL_PARTS:

+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&

+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)

+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));

+ else if (N3.getOpcode() == ISD::AND)

+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {

+ // If the and is only masking out bits that cannot effect the shift,

+ // eliminate the and.

+ unsigned NumBits = VT.getSizeInBits()*2;

+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)

+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));

+ }

+ break;

+#endif

+ }

+ // Memoize the node unless it returns a flag.

+ SDNode *N;

+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return SDValue(E, 0);

+ if (NumOps == 1) {

+ N = NodeAllocator.Allocate<UnarySDNode>();

+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);

+ } else if (NumOps == 2) {

+ N = NodeAllocator.Allocate<BinarySDNode>();

+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);

+ } else if (NumOps == 3) {

+ N = NodeAllocator.Allocate<TernarySDNode>();

+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);

+ } else {

+ N = NodeAllocator.Allocate<SDNode>();

+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);

+ }

+ CSEMap.InsertNode(N, IP);

+ } else {

+ if (NumOps == 1) {

+ N = NodeAllocator.Allocate<UnarySDNode>();

+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);

+ } else if (NumOps == 2) {

+ N = NodeAllocator.Allocate<BinarySDNode>();

+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);

+ } else if (NumOps == 3) {

+ N = NodeAllocator.Allocate<TernarySDNode>();

+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);

+ } else {

+ N = NodeAllocator.Allocate<SDNode>();

+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);

+ }

+ AllNodes.push_back(N);

+#ifndef NDEBUG

+ VerifyNode(N);

+#endif

+ return SDValue(N, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {

+ return getNode(Opcode, DL, VTList, 0, 0);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ SDValue N1) {

+ SDValue Ops[] = { N1 };

+ return getNode(Opcode, DL, VTList, Ops, 1);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ SDValue N1, SDValue N2) {

+ SDValue Ops[] = { N1, N2 };

+ return getNode(Opcode, DL, VTList, Ops, 2);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ SDValue N1, SDValue N2, SDValue N3) {

+ SDValue Ops[] = { N1, N2, N3 };

+ return getNode(Opcode, DL, VTList, Ops, 3);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ SDValue N1, SDValue N2, SDValue N3,

+ SDValue N4) {

+ SDValue Ops[] = { N1, N2, N3, N4 };

+ return getNode(Opcode, DL, VTList, Ops, 4);

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,

+ SDValue N1, SDValue N2, SDValue N3,

+ SDValue N4, SDValue N5) {

+ SDValue Ops[] = { N1, N2, N3, N4, N5 };

+ return getNode(Opcode, DL, VTList, Ops, 5);

+SDVTList SelectionDAG::getVTList(MVT VT) {

+ return makeVTList(SDNode::getValueTypeList(VT), 1);

+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {

+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),

+ E = VTList.rend(); I != E; ++I)

+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)

+ return *I;

+ MVT *Array = Allocator.Allocate<MVT>(2);

+ Array[0] = VT1;

+ Array[1] = VT2;

+ SDVTList Result = makeVTList(Array, 2);

+ VTList.push_back(Result);

+ return Result;

+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {

+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),

+ E = VTList.rend(); I != E; ++I)

+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&

+ I->VTs[2] == VT3)

+ return *I;

+ MVT *Array = Allocator.Allocate<MVT>(3);

+ Array[0] = VT1;

+ Array[1] = VT2;

+ Array[2] = VT3;

+ SDVTList Result = makeVTList(Array, 3);

+ VTList.push_back(Result);

+ return Result;

+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {

+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),

+ E = VTList.rend(); I != E; ++I)

+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&

+ I->VTs[2] == VT3 && I->VTs[3] == VT4)

+ return *I;

+ MVT *Array = Allocator.Allocate<MVT>(3);

+ Array[0] = VT1;

+ Array[1] = VT2;

+ Array[2] = VT3;

+ Array[3] = VT4;

+ SDVTList Result = makeVTList(Array, 4);

+ VTList.push_back(Result);

+ return Result;

+SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {

+ switch (NumVTs) {

+ case 0: assert(0 && "Cannot have nodes without results!");

+ case 1: return getVTList(VTs[0]);

+ case 2: return getVTList(VTs[0], VTs[1]);

+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);

+ default: break;

+ }

+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),

+ E = VTList.rend(); I != E; ++I) {

+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])

+ continue;

+ bool NoMatch = false;

+ for (unsigned i = 2; i != NumVTs; ++i)

+ if (VTs[i] != I->VTs[i]) {

+ NoMatch = true;

+ break;

+ }

+ if (!NoMatch)

+ return *I;

+ }

+ MVT *Array = Allocator.Allocate<MVT>(NumVTs);

+ std::copy(VTs, VTs+NumVTs, Array);

+ SDVTList Result = makeVTList(Array, NumVTs);

+ VTList.push_back(Result);

+ return Result;

+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the

+/// specified operands. If the resultant node already exists in the DAG,

+/// this does not modify the specified node, instead it returns the node that

+/// already exists. If the resultant node does not exist in the DAG, the

+/// input node is returned. As a degenerate case, if you specify the same

+/// input operands as the node already has, the input node is returned.

+SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {

+ SDNode *N = InN.getNode();

+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");

+ // Check to see if there is no change.

+ if (Op == N->getOperand(0)) return InN;

+ // See if the modified node already exists.

+ void *InsertPos = 0;

+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))

+ return SDValue(Existing, InN.getResNo());

+ // Nope it doesn't. Remove the node from its current place in the maps.

+ if (InsertPos)

+ if (!RemoveNodeFromCSEMaps(N))

+ InsertPos = 0;

+ // Now we update the operands.

+ N->OperandList[0].set(Op);

+ // If this gets put into a CSE map, add it.

+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);

+ return InN;

+SDValue SelectionDAG::

+UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {

+ SDNode *N = InN.getNode();

+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");

+ // Check to see if there is no change.

+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))

+ return InN; // No operands changed, just return the input node.

+ // See if the modified node already exists.

+ void *InsertPos = 0;

+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))

+ return SDValue(Existing, InN.getResNo());

+ // Nope it doesn't. Remove the node from its current place in the maps.

+ if (InsertPos)

+ if (!RemoveNodeFromCSEMaps(N))

+ InsertPos = 0;

+ // Now we update the operands.

+ if (N->OperandList[0] != Op1)

+ N->OperandList[0].set(Op1);

+ if (N->OperandList[1] != Op2)

+ N->OperandList[1].set(Op2);

+ // If this gets put into a CSE map, add it.

+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);

+ return InN;

+SDValue SelectionDAG::

+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return UpdateNodeOperands(N, Ops, 3);

+SDValue SelectionDAG::

+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,

+ SDValue Op3, SDValue Op4) {

+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };

+ return UpdateNodeOperands(N, Ops, 4);

+SDValue SelectionDAG::

+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,

+ SDValue Op3, SDValue Op4, SDValue Op5) {

+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };

+ return UpdateNodeOperands(N, Ops, 5);

+SDValue SelectionDAG::

+UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {

+ SDNode *N = InN.getNode();

+ assert(N->getNumOperands() == NumOps &&

+ "Update with wrong number of operands");

+ // Check to see if there is no change.

+ bool AnyChange = false;

+ for (unsigned i = 0; i != NumOps; ++i) {

+ if (Ops[i] != N->getOperand(i)) {

+ AnyChange = true;

+ break;

+ }

+ // No operands changed, just return the input node.

+ if (!AnyChange) return InN;

+ // See if the modified node already exists.

+ void *InsertPos = 0;

+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))

+ return SDValue(Existing, InN.getResNo());

+ // Nope it doesn't. Remove the node from its current place in the maps.

+ if (InsertPos)

+ if (!RemoveNodeFromCSEMaps(N))

+ InsertPos = 0;

+ // Now we update the operands.

+ for (unsigned i = 0; i != NumOps; ++i)

+ if (N->OperandList[i] != Ops[i])

+ N->OperandList[i].set(Ops[i]);

+ // If this gets put into a CSE map, add it.

+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);

+ return InN;

+/// DropOperands - Release the operands and set this node to have

+/// zero operands.

+void SDNode::DropOperands() {

+ // Unlike the code in MorphNodeTo that does this, we don't need to

+ // watch for dead nodes here.

+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {

+ SDUse &Use = *I++;

+ Use.set(SDValue());

+ }

+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a

+/// machine opcode.

+///

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT) {

+ SDVTList VTs = getVTList(VT);

+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT, SDValue Op1) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT, SDValue Op1,

+ SDValue Op2) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1, Op2 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT, SDValue Op1,

+ SDValue Op2, SDValue Op3) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT, const SDValue *Ops,

+ unsigned NumOps) {

+ SDVTList VTs = getVTList(VT);

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2, const SDValue *Ops,

+ unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2, MVT VT3,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2, MVT VT3, MVT VT4,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2,

+ SDValue Op1) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2,

+ SDValue Op1, SDValue Op2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2,

+ SDValue Op1, SDValue Op2,

+ SDValue Op3) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ MVT VT1, MVT VT2, MVT VT3,

+ SDValue Op1, SDValue Op2,

+ SDValue Op3) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);

+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,

+ SDVTList VTs, const SDValue *Ops,

+ unsigned NumOps) {

+ return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT) {

+ SDVTList VTs = getVTList(VT);

+ return MorphNodeTo(N, Opc, VTs, 0, 0);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT, SDValue Op1) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 1);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT, SDValue Op1,

+ SDValue Op2) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1, Op2 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 2);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT, SDValue Op1,

+ SDValue Op2, SDValue Op3) {

+ SDVTList VTs = getVTList(VT);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 3);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT, const SDValue *Ops,

+ unsigned NumOps) {

+ SDVTList VTs = getVTList(VT);

+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2, const SDValue *Ops,

+ unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2, MVT VT3,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2,

+ SDValue Op1) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 1);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2,

+ SDValue Op1, SDValue Op2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 2);

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ MVT VT1, MVT VT2,

+ SDValue Op1, SDValue Op2,

+ SDValue Op3) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return MorphNodeTo(N, Opc, VTs, Ops, 3);

+/// MorphNodeTo - These *mutate* the specified node to have the specified

+/// return type, opcode, and operands.

+///

+/// Note that MorphNodeTo returns the resultant node. If there is already a

+/// node of the specified opcode and operands, it returns that node instead of

+/// the current one. Note that the DebugLoc need not be the same.

+///

+/// Using MorphNodeTo is faster than creating a new node and swapping it in

+/// with ReplaceAllUsesWith both because it often avoids allocating a new

+/// node, and because it doesn't require CSE recalculation for any of

+/// the node's users.

+///

+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,

+ SDVTList VTs, const SDValue *Ops,

+ unsigned NumOps) {

+ // If an identical node already exists, use it.

+ void *IP = 0;

+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);

+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return ON;

+ }

+ if (!RemoveNodeFromCSEMaps(N))

+ IP = 0;

+ // Start the morphing.

+ N->NodeType = Opc;

+ N->ValueList = VTs.VTs;

+ N->NumValues = VTs.NumVTs;

+ // Clear the operands list, updating used nodes to remove this from their

+ // use list. Keep track of any operands that become dead as a result.

+ SmallPtrSet<SDNode*, 16> DeadNodeSet;

+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {

+ SDUse &Use = *I++;

+ SDNode *Used = Use.getNode();

+ Use.set(SDValue());

+ if (Used->use_empty())

+ DeadNodeSet.insert(Used);

+ }

+ // If NumOps is larger than the # of operands we currently have, reallocate

+ // the operand list.

+ if (NumOps > N->NumOperands) {

+ if (N->OperandsNeedDelete)

+ delete[] N->OperandList;

+ if (N->isMachineOpcode()) {

+ // We're creating a final node that will live unmorphed for the

+ // remainder of the current SelectionDAG iteration, so we can allocate

+ // the operands directly out of a pool with no recycling metadata.

+ N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps);

+ N->OperandsNeedDelete = false;

+ } else {

+ N->OperandList = new SDUse[NumOps];

+ N->OperandsNeedDelete = true;

+ }

+ // Assign the new operands.

+ N->NumOperands = NumOps;

+ for (unsigned i = 0, e = NumOps; i != e; ++i) {

+ N->OperandList[i].setUser(N);

+ N->OperandList[i].setInitial(Ops[i]);

+ }

+ // Delete any nodes that are still dead after adding the uses for the

+ // new operands.

+ SmallVector<SDNode *, 16> DeadNodes;

+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),

+ E = DeadNodeSet.end(); I != E; ++I)

+ if ((*I)->use_empty())

+ DeadNodes.push_back(*I);

+ RemoveDeadNodes(DeadNodes);

+ if (IP)

+ CSEMap.InsertNode(N, IP); // Memoize the new node.

+ return N;

+/// getTargetNode - These are used for target selectors to create a new node

+/// with specified return type(s), target opcode, and operands.

+///

+/// Note that getTargetNode returns the resultant node. If there is already a

+/// node of the specified opcode and operands, it returns that node instead of

+/// the current one.

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) {

+ return getNode(~Opcode, dl, VT).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,

+ SDValue Op1) {

+ return getNode(~Opcode, dl, VT, Op1).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,

+ SDValue Op1, SDValue Op2) {

+ return getNode(~Opcode, dl, VT, Op1, Op2).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,

+ SDValue Op1, SDValue Op2,

+ SDValue Op3) {

+ return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,

+ const SDValue *Ops, unsigned NumOps) {

+ return getNode(~Opcode, dl, VT, Ops, NumOps).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ MVT VT1, MVT VT2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Op;

+ return getNode(~Opcode, dl, VTs, &Op, 0).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,

+ MVT VT2, SDValue Op1) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return getNode(~Opcode, dl, VTs, &Op1, 1).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,

+ MVT VT2, SDValue Op1,

+ SDValue Op2) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2 };

+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,

+ MVT VT2, SDValue Op1,

+ SDValue Op2, SDValue Op3) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ MVT VT1, MVT VT2,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2);

+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ MVT VT1, MVT VT2, MVT VT3,

+ SDValue Op1, SDValue Op2) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ SDValue Ops[] = { Op1, Op2 };

+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ MVT VT1, MVT VT2, MVT VT3,

+ SDValue Op1, SDValue Op2,

+ SDValue Op3) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ SDValue Ops[] = { Op1, Op2, Op3 };

+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ MVT VT1, MVT VT2, MVT VT3,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3);

+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,

+ MVT VT2, MVT VT3, MVT VT4,

+ const SDValue *Ops, unsigned NumOps) {

+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);

+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();

+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,

+ const std::vector<MVT> &ResultTys,

+ const SDValue *Ops, unsigned NumOps) {

+ return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode();

+/// getNodeIfExists - Get the specified node if it's already available, or

+/// else return NULL.

+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,

+ const SDValue *Ops, unsigned NumOps) {

+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);

+ void *IP = 0;

+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))

+ return E;

+ }

+ return NULL;

+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.

+/// This can cause recursive merging of nodes in the DAG.

+///

+/// This version assumes From has a single result value.

+///

+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,

+ DAGUpdateListener *UpdateListener) {

+ SDNode *From = FromN.getNode();

+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&

+ "Cannot replace with this method!");

+ assert(From != To.getNode() && "Cannot replace uses of with self");

+ // Iterate over all the existing uses of From. New uses will be added

+ // to the beginning of the use list, which we avoid visiting.

+ // This specifically avoids visiting uses of From that arise while the

+ // replacement is happening, because any such uses would be the result

+ // of CSE: If an existing node looks like From after one of its operands

+ // is replaced by To, we don't want to replace of all its users with To

+ // too. See PR3018 for more info.

+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();

+ while (UI != UE) {

+ SDNode *User = *UI;

+ // This node is about to morph, remove its old self from the CSE maps.

+ RemoveNodeFromCSEMaps(User);

+ // A user can appear in a use list multiple times, and when this

+ // happens the uses are usually next to each other in the list.

+ // To help reduce the number of CSE recomputations, process all

+ // the uses of this user that we can find this way.

+ do {

+ SDUse &Use = UI.getUse();

+ ++UI;

+ Use.set(To);

+ } while (UI != UE && *UI == User);

+ // Now that we have modified User, add it back to the CSE maps. If it

+ // already exists there, recursively merge the results together.

+ AddModifiedNodeToCSEMaps(User, UpdateListener);

+ }

+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.

+/// This can cause recursive merging of nodes in the DAG.

+///

+/// This version assumes that for each value of From, there is a

+/// corresponding value in To in the same position with the same type.

+///

+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,

+ DAGUpdateListener *UpdateListener) {

+#ifndef NDEBUG

+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)

+ assert((!From->hasAnyUseOfValue(i) ||

+ From->getValueType(i) == To->getValueType(i)) &&

+ "Cannot use this version of ReplaceAllUsesWith!");

+#endif

+ // Handle the trivial case.

+ if (From == To)

+ return;

+ // Iterate over just the existing users of From. See the comments in

+ // the ReplaceAllUsesWith above.

+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();

+ while (UI != UE) {

+ SDNode *User = *UI;

+ // This node is about to morph, remove its old self from the CSE maps.

+ RemoveNodeFromCSEMaps(User);

+ // A user can appear in a use list multiple times, and when this

+ // happens the uses are usually next to each other in the list.

+ // To help reduce the number of CSE recomputations, process all

+ // the uses of this user that we can find this way.

+ do {

+ SDUse &Use = UI.getUse();

+ ++UI;

+ Use.setNode(To);

+ } while (UI != UE && *UI == User);

+ // Now that we have modified User, add it back to the CSE maps. If it

+ // already exists there, recursively merge the results together.

+ AddModifiedNodeToCSEMaps(User, UpdateListener);

+ }

+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.

+/// This can cause recursive merging of nodes in the DAG.

+///

+/// This version can replace From with any result values. To must match the

+/// number and types of values returned by From.

+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,

+ const SDValue *To,

+ DAGUpdateListener *UpdateListener) {

+ if (From->getNumValues() == 1) // Handle the simple case efficiently.

+ return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);

+ // Iterate over just the existing users of From. See the comments in

+ // the ReplaceAllUsesWith above.

+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();

+ while (UI != UE) {

+ SDNode *User = *UI;

+ // This node is about to morph, remove its old self from the CSE maps.

+ RemoveNodeFromCSEMaps(User);

+ // A user can appear in a use list multiple times, and when this

+ // happens the uses are usually next to each other in the list.

+ // To help reduce the number of CSE recomputations, process all

+ // the uses of this user that we can find this way.

+ do {

+ SDUse &Use = UI.getUse();

+ const SDValue &ToOp = To[Use.getResNo()];

+ ++UI;

+ Use.set(ToOp);

+ } while (UI != UE && *UI == User);

+ // Now that we have modified User, add it back to the CSE maps. If it

+ // already exists there, recursively merge the results together.

+ AddModifiedNodeToCSEMaps(User, UpdateListener);

+ }

+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving

+/// uses of other values produced by From.getNode() alone. The Deleted

+/// vector is handled the same way as for ReplaceAllUsesWith.

+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,

+ DAGUpdateListener *UpdateListener){

+ // Handle the really simple, really trivial case efficiently.

+ if (From == To) return;

+ // Handle the simple, trivial, case efficiently.

+ if (From.getNode()->getNumValues() == 1) {

+ ReplaceAllUsesWith(From, To, UpdateListener);

+ return;

+ }

+ // Iterate over just the existing users of From. See the comments in

+ // the ReplaceAllUsesWith above.

+ SDNode::use_iterator UI = From.getNode()->use_begin(),

+ UE = From.getNode()->use_end();

+ while (UI != UE) {

+ SDNode *User = *UI;

+ bool UserRemovedFromCSEMaps = false;

+ // A user can appear in a use list multiple times, and when this

+ // happens the uses are usually next to each other in the list.

+ // To help reduce the number of CSE recomputations, process all

+ // the uses of this user that we can find this way.

+ do {

+ SDUse &Use = UI.getUse();

+ // Skip uses of different values from the same node.

+ if (Use.getResNo() != From.getResNo()) {

+ ++UI;

+ continue;

+ }

+ // If this node hasn't been modified yet, it's still in the CSE maps,

+ // so remove its old self from the CSE maps.

+ if (!UserRemovedFromCSEMaps) {

+ RemoveNodeFromCSEMaps(User);

+ UserRemovedFromCSEMaps = true;

+ }

+ ++UI;

+ Use.set(To);

+ } while (UI != UE && *UI == User);

+ // We are iterating over all uses of the From node, so if a use

+ // doesn't use the specific value, no changes are made.

+ if (!UserRemovedFromCSEMaps)

+ continue;

+ // Now that we have modified User, add it back to the CSE maps. If it

+ // already exists there, recursively merge the results together.

+ AddModifiedNodeToCSEMaps(User, UpdateListener);

+ }

+namespace {

+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith

+ /// to record information about a use.

+ struct UseMemo {

+ SDNode *User;

+ unsigned Index;

+ SDUse *Use;

+ };

+ /// operator< - Sort Memos by User.

+ bool operator<(const UseMemo &L, const UseMemo &R) {

+ return (intptr_t)L.User < (intptr_t)R.User;

+ }

+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving

+/// uses of other values produced by From.getNode() alone. The same value

+/// may appear in both the From and To list. The Deleted vector is

+/// handled the same way as for ReplaceAllUsesWith.

+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,

+ const SDValue *To,

+ unsigned Num,

+ DAGUpdateListener *UpdateListener){

+ // Handle the simple, trivial case efficiently.

+ if (Num == 1)

+ return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);

+ // Read up all the uses and make records of them. This helps

+ // processing new uses that are introduced during the

+ // replacement process.

+ SmallVector<UseMemo, 4> Uses;

+ for (unsigned i = 0; i != Num; ++i) {

+ unsigned FromResNo = From[i].getResNo();

+ SDNode *FromNode = From[i].getNode();

+ for (SDNode::use_iterator UI = FromNode->use_begin(),

+ E = FromNode->use_end(); UI != E; ++UI) {

+ SDUse &Use = UI.getUse();

+ if (Use.getResNo() == FromResNo) {

+ UseMemo Memo = { *UI, i, &Use };

+ Uses.push_back(Memo);

+ }

+ // Sort the uses, so that all the uses from a given User are together.

+ std::sort(Uses.begin(), Uses.end());

+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();

+ UseIndex != UseIndexEnd; ) {

+ // We know that this user uses some value of From. If it is the right

+ // value, update it.

+ SDNode *User = Uses[UseIndex].User;

+ // This node is about to morph, remove its old self from the CSE maps.

+ RemoveNodeFromCSEMaps(User);

+ // The Uses array is sorted, so all the uses for a given User

+ // are next to each other in the list.

+ // To help reduce the number of CSE recomputations, process all

+ // the uses of this user that we can find this way.

+ do {

+ unsigned i = Uses[UseIndex].Index;

+ SDUse &Use = *Uses[UseIndex].Use;

+ ++UseIndex;

+ Use.set(To[i]);

+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);

+ // Now that we have modified User, add it back to the CSE maps. If it

+ // already exists there, recursively merge the results together.

+ AddModifiedNodeToCSEMaps(User, UpdateListener);

+ }

+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG

+/// based on their topological order. It returns the maximum id and a vector

+/// of the SDNodes* in assigned order by reference.

+unsigned SelectionDAG::AssignTopologicalOrder() {

+ unsigned DAGSize = 0;

+ // SortedPos tracks the progress of the algorithm. Nodes before it are

+ // sorted, nodes after it are unsorted. When the algorithm completes

+ // it is at the end of the list.

+ allnodes_iterator SortedPos = allnodes_begin();

+ // Visit all the nodes. Move nodes with no operands to the front of

+ // the list immediately. Annotate nodes that do have operands with their

+ // operand count. Before we do this, the Node Id fields of the nodes

+ // may contain arbitrary values. After, the Node Id fields for nodes

+ // before SortedPos will contain the topological sort index, and the

+ // Node Id fields for nodes At SortedPos and after will contain the

+ // count of outstanding operands.

+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {

+ SDNode *N = I++;

+ unsigned Degree = N->getNumOperands();

+ if (Degree == 0) {

+ // A node with no uses, add it to the result array immediately.

+ N->setNodeId(DAGSize++);

+ allnodes_iterator Q = N;

+ if (Q != SortedPos)

+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));

+ ++SortedPos;

+ } else {

+ // Temporarily use the Node Id as scratch space for the degree count.

+ N->setNodeId(Degree);

+ }

+ // Visit all the nodes. As we iterate, moves nodes into sorted order,

+ // such that by the time the end is reached all nodes will be sorted.

+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {

+ SDNode *N = I;

+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

+ UI != UE; ++UI) {

+ SDNode *P = *UI;

+ unsigned Degree = P->getNodeId();

+ --Degree;

+ if (Degree == 0) {

+ // All of P's operands are sorted, so P may sorted now.

+ P->setNodeId(DAGSize++);

+ if (P != SortedPos)

+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));

+ ++SortedPos;

+ } else {

+ // Update P's outstanding operand count.

+ P->setNodeId(Degree);

+ }

+ assert(SortedPos == AllNodes.end() &&

+ "Topological sort incomplete!");

+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&

+ "First node in topological sort is not the entry token!");

+ assert(AllNodes.front().getNodeId() == 0 &&

+ "First node in topological sort has non-zero id!");

+ assert(AllNodes.front().getNumOperands() == 0 &&

+ "First node in topological sort has operands!");

+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&

+ "Last node in topologic sort has unexpected id!");

+ assert(AllNodes.back().use_empty() &&

+ "Last node in topologic sort has users!");

+ assert(DAGSize == allnodes_size() && "Node count mismatch!");

+ return DAGSize;

+//===----------------------------------------------------------------------===//

+// SDNode Class

+//===----------------------------------------------------------------------===//

+HandleSDNode::~HandleSDNode() {

+ DropOperands();

+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,

+ MVT VT, int64_t o)

+ : SDNode(isa<GlobalVariable>(GA) &&

+ cast<GlobalVariable>(GA)->isThreadLocal() ?

+ // Thread Local

+ (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :

+ // Non Thread Local

+ (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),

+ DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) {

+ TheGlobal = const_cast<GlobalValue*>(GA);

+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt,

+ const Value *srcValue, int SVO,

+ unsigned alignment, bool vol)

+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {

+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);

+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");

+ assert(getAlignment() == alignment && "Alignment representation error!");

+ assert(isVolatile() == vol && "Volatile representation error!");

+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,

+ const SDValue *Ops,

+ unsigned NumOps, MVT memvt, const Value *srcValue,

+ int SVO, unsigned alignment, bool vol)

+ : SDNode(Opc, dl, VTs, Ops, NumOps),

+ MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {

+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);

+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");

+ assert(getAlignment() == alignment && "Alignment representation error!");

+ assert(isVolatile() == vol && "Volatile representation error!");

+/// getMemOperand - Return a MachineMemOperand object describing the memory

+/// reference performed by this memory reference.

+MachineMemOperand MemSDNode::getMemOperand() const {

+ int Flags = 0;

+ if (isa<LoadSDNode>(this))

+ Flags = MachineMemOperand::MOLoad;

+ else if (isa<StoreSDNode>(this))

+ Flags = MachineMemOperand::MOStore;

+ else if (isa<AtomicSDNode>(this)) {

+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;

+ }

+ else {

+ const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this);

+ assert(MemIntrinNode && "Unknown MemSDNode opcode!");

+ if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad;

+ if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore;

+ }

+ int Size = (getMemoryVT().getSizeInBits() + 7) >> 3;

+ if (isVolatile()) Flags |= MachineMemOperand::MOVolatile;

+ // Check if the memory reference references a frame index

+ const FrameIndexSDNode *FI =

+ dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode());

+ if (!getSrcValue() && FI)

+ return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()),

+ Flags, 0, Size, getAlignment());

+ else

+ return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(),

+ Size, getAlignment());

+/// Profile - Gather unique data for the node.

+///

+void SDNode::Profile(FoldingSetNodeID &ID) const {

+ AddNodeIDNode(ID, this);

+/// getValueTypeList - Return a pointer to the specified value type.

+///

+const MVT *SDNode::getValueTypeList(MVT VT) {

+ if (VT.isExtended()) {

+ static std::set<MVT, MVT::compareRawBits> EVTs;

+ return &(*EVTs.insert(VT).first);

+ } else {

+ static MVT VTs[MVT::LAST_VALUETYPE];

+ VTs[VT.getSimpleVT()] = VT;

+ return &VTs[VT.getSimpleVT()];

+ }

+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the

+/// indicated value. This method ignores uses of other values defined by this

+/// operation.

+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {

+ assert(Value < getNumValues() && "Bad value!");

+ // TODO: Only iterate over uses of a given value of the node

+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {

+ if (UI.getUse().getResNo() == Value) {

+ if (NUses == 0)

+ return false;

+ --NUses;

+ }

+ // Found exactly the right number of uses?

+ return NUses == 0;

+/// hasAnyUseOfValue - Return true if there are any use of the indicated

+/// value. This method ignores uses of other values defined by this operation.

+bool SDNode::hasAnyUseOfValue(unsigned Value) const {

+ assert(Value < getNumValues() && "Bad value!");

+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)

+ if (UI.getUse().getResNo() == Value)

+ return true;

+ return false;

+/// isOnlyUserOf - Return true if this node is the only use of N.

+///

+bool SDNode::isOnlyUserOf(SDNode *N) const {

+ bool Seen = false;

+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {

+ SDNode *User = *I;

+ if (User == this)

+ Seen = true;

+ else

+ return false;

+ }

+ return Seen;

+/// isOperand - Return true if this node is an operand of N.

+///

+bool SDValue::isOperandOf(SDNode *N) const {

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ if (*this == N->getOperand(i))

+ return true;

+ return false;

+bool SDNode::isOperandOf(SDNode *N) const {

+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)

+ if (this == N->OperandList[i].getNode())

+ return true;

+ return false;

+/// reachesChainWithoutSideEffects - Return true if this operand (which must

+/// be a chain) reaches the specified operand without crossing any

+/// side-effecting instructions. In practice, this looks through token

+/// factors and non-volatile loads. In order to remain efficient, this only

+/// looks a couple of nodes in, it does not do an exhaustive search.

+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,

+ unsigned Depth) const {

+ if (*this == Dest) return true;

+ // Don't search too deeply, we just want to be able to see through

+ // TokenFactor's etc.

+ if (Depth == 0) return false;

+ // If this is a token factor, all inputs to the TF happen in parallel. If any

+ // of the operands of the TF reach dest, then we can do the xform.

+ if (getOpcode() == ISD::TokenFactor) {

+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)

+ if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))

+ return true;

+ return false;

+ }

+ // Loads don't have side effects, look through them.

+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {

+ if (!Ld->isVolatile())

+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);

+ }

+ return false;

+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,

+ SmallPtrSet<SDNode *, 32> &Visited) {

+ if (found || !Visited.insert(N))

+ return;

+ for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {

+ SDNode *Op = N->getOperand(i).getNode();

+ if (Op == P) {

+ found = true;

+ return;

+ }

+ findPredecessor(Op, P, found, Visited);

+ }

+/// isPredecessorOf - Return true if this node is a predecessor of N. This node

+/// is either an operand of N or it can be reached by recursively traversing

+/// up the operands.

+/// NOTE: this is an expensive method. Use it carefully.

+bool SDNode::isPredecessorOf(SDNode *N) const {

+ SmallPtrSet<SDNode *, 32> Visited;

+ bool found = false;

+ findPredecessor(N, this, found, Visited);

+ return found;

+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {

+ assert(Num < NumOperands && "Invalid child # of SDNode!");

+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();

+std::string SDNode::getOperationName(const SelectionDAG *G) const {

+ switch (getOpcode()) {

+ default:

+ if (getOpcode() < ISD::BUILTIN_OP_END)

+ return "<<Unknown DAG Node>>";

+ if (isMachineOpcode()) {

+ if (G)

+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())

+ if (getMachineOpcode() < TII->getNumOpcodes())

+ return TII->get(getMachineOpcode()).getName();

+ return "<<Unknown Machine Node>>";

+ }

+ if (G) {

+ const TargetLowering &TLI = G->getTargetLoweringInfo();

+ const char *Name = TLI.getTargetNodeName(getOpcode());

+ if (Name) return Name;

+ return "<<Unknown Target Node>>";

+ }

+ return "<<Unknown Node>>";

+#ifndef NDEBUG

+ case ISD::DELETED_NODE:

+ return "<<Deleted Node!>>";

+#endif

+ case ISD::PREFETCH: return "Prefetch";

+ case ISD::MEMBARRIER: return "MemBarrier";

+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";

+ case ISD::ATOMIC_SWAP: return "AtomicSwap";

+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";

+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";

+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";

+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";

+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";

+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";

+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";

+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";

+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";

+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";

+ case ISD::PCMARKER: return "PCMarker";

+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";

+ case ISD::SRCVALUE: return "SrcValue";

+ case ISD::MEMOPERAND: return "MemOperand";

+ case ISD::EntryToken: return "EntryToken";

+ case ISD::TokenFactor: return "TokenFactor";

+ case ISD::AssertSext: return "AssertSext";

+ case ISD::AssertZext: return "AssertZext";

+ case ISD::BasicBlock: return "BasicBlock";

+ case ISD::ARG_FLAGS: return "ArgFlags";

+ case ISD::VALUETYPE: return "ValueType";

+ case ISD::Register: return "Register";

+ case ISD::Constant: return "Constant";

+ case ISD::ConstantFP: return "ConstantFP";

+ case ISD::GlobalAddress: return "GlobalAddress";

+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";

+ case ISD::FrameIndex: return "FrameIndex";

+ case ISD::JumpTable: return "JumpTable";

+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";

+ case ISD::RETURNADDR: return "RETURNADDR";

+ case ISD::FRAMEADDR: return "FRAMEADDR";

+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";

+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";

+ case ISD::EHSELECTION: return "EHSELECTION";

+ case ISD::EH_RETURN: return "EH_RETURN";

+ case ISD::ConstantPool: return "ConstantPool";

+ case ISD::ExternalSymbol: return "ExternalSymbol";

+ case ISD::INTRINSIC_WO_CHAIN: {

+ unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue();

+ return Intrinsic::getName((Intrinsic::ID)IID);

+ }

+ case ISD::INTRINSIC_VOID:

+ case ISD::INTRINSIC_W_CHAIN: {

+ unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue();

+ return Intrinsic::getName((Intrinsic::ID)IID);

+ }

+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";

+ case ISD::TargetConstant: return "TargetConstant";

+ case ISD::TargetConstantFP:return "TargetConstantFP";

+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";

+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";

+ case ISD::TargetFrameIndex: return "TargetFrameIndex";

+ case ISD::TargetJumpTable: return "TargetJumpTable";

+ case ISD::TargetConstantPool: return "TargetConstantPool";

+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";

+ case ISD::CopyToReg: return "CopyToReg";

+ case ISD::CopyFromReg: return "CopyFromReg";

+ case ISD::UNDEF: return "undef";

+ case ISD::MERGE_VALUES: return "merge_values";

+ case ISD::INLINEASM: return "inlineasm";

+ case ISD::DBG_LABEL: return "dbg_label";

+ case ISD::EH_LABEL: return "eh_label";

+ case ISD::DECLARE: return "declare";

+ case ISD::HANDLENODE: return "handlenode";

+ case ISD::FORMAL_ARGUMENTS: return "formal_arguments";

+ case ISD::CALL: return "call";

+ // Unary operators

+ case ISD::FABS: return "fabs";

+ case ISD::FNEG: return "fneg";

+ case ISD::FSQRT: return "fsqrt";

+ case ISD::FSIN: return "fsin";

+ case ISD::FCOS: return "fcos";

+ case ISD::FPOWI: return "fpowi";

+ case ISD::FPOW: return "fpow";

+ case ISD::FTRUNC: return "ftrunc";

+ case ISD::FFLOOR: return "ffloor";

+ case ISD::FCEIL: return "fceil";

+ case ISD::FRINT: return "frint";

+ case ISD::FNEARBYINT: return "fnearbyint";

+ // Binary operators

+ case ISD::ADD: return "add";

+ case ISD::SUB: return "sub";

+ case ISD::MUL: return "mul";

+ case ISD::MULHU: return "mulhu";

+ case ISD::MULHS: return "mulhs";

+ case ISD::SDIV: return "sdiv";

+ case ISD::UDIV: return "udiv";

+ case ISD::SREM: return "srem";

+ case ISD::UREM: return "urem";

+ case ISD::SMUL_LOHI: return "smul_lohi";

+ case ISD::UMUL_LOHI: return "umul_lohi";

+ case ISD::SDIVREM: return "sdivrem";

+ case ISD::UDIVREM: return "udivrem";

+ case ISD::AND: return "and";

+ case ISD::OR: return "or";

+ case ISD::XOR: return "xor";

+ case ISD::SHL: return "shl";

+ case ISD::SRA: return "sra";

+ case ISD::SRL: return "srl";

+ case ISD::ROTL: return "rotl";

+ case ISD::ROTR: return "rotr";

+ case ISD::FADD: return "fadd";

+ case ISD::FSUB: return "fsub";

+ case ISD::FMUL: return "fmul";

+ case ISD::FDIV: return "fdiv";

+ case ISD::FREM: return "frem";

+ case ISD::FCOPYSIGN: return "fcopysign";

+ case ISD::FGETSIGN: return "fgetsign";

+ case ISD::SETCC: return "setcc";

+ case ISD::VSETCC: return "vsetcc";

+ case ISD::SELECT: return "select";

+ case ISD::SELECT_CC: return "select_cc";

+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";

+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";

+ case ISD::CONCAT_VECTORS: return "concat_vectors";

+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";

+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";

+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";

+ case ISD::CARRY_FALSE: return "carry_false";

+ case ISD::ADDC: return "addc";

+ case ISD::ADDE: return "adde";

+ case ISD::SADDO: return "saddo";

+ case ISD::UADDO: return "uaddo";

+ case ISD::SSUBO: return "ssubo";

+ case ISD::USUBO: return "usubo";

+ case ISD::SMULO: return "smulo";

+ case ISD::UMULO: return "umulo";

+ case ISD::SUBC: return "subc";

+ case ISD::SUBE: return "sube";

+ case ISD::SHL_PARTS: return "shl_parts";

+ case ISD::SRA_PARTS: return "sra_parts";

+ case ISD::SRL_PARTS: return "srl_parts";

+ // Conversion operators.

+ case ISD::SIGN_EXTEND: return "sign_extend";

+ case ISD::ZERO_EXTEND: return "zero_extend";

+ case ISD::ANY_EXTEND: return "any_extend";

+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";

+ case ISD::TRUNCATE: return "truncate";

+ case ISD::FP_ROUND: return "fp_round";

+ case ISD::FLT_ROUNDS_: return "flt_rounds";

+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";

+ case ISD::FP_EXTEND: return "fp_extend";

+ case ISD::SINT_TO_FP: return "sint_to_fp";

+ case ISD::UINT_TO_FP: return "uint_to_fp";

+ case ISD::FP_TO_SINT: return "fp_to_sint";

+ case ISD::FP_TO_UINT: return "fp_to_uint";

+ case ISD::BIT_CONVERT: return "bit_convert";

+ case ISD::CONVERT_RNDSAT: {

+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {

+ default: assert(0 && "Unknown cvt code!");

+ case ISD::CVT_FF: return "cvt_ff";

+ case ISD::CVT_FS: return "cvt_fs";

+ case ISD::CVT_FU: return "cvt_fu";

+ case ISD::CVT_SF: return "cvt_sf";

+ case ISD::CVT_UF: return "cvt_uf";

+ case ISD::CVT_SS: return "cvt_ss";

+ case ISD::CVT_SU: return "cvt_su";

+ case ISD::CVT_US: return "cvt_us";

+ case ISD::CVT_UU: return "cvt_uu";

+ }

+ // Control flow instructions

+ case ISD::BR: return "br";

+ case ISD::BRIND: return "brind";

+ case ISD::BR_JT: return "br_jt";

+ case ISD::BRCOND: return "brcond";

+ case ISD::BR_CC: return "br_cc";

+ case ISD::RET: return "ret";

+ case ISD::CALLSEQ_START: return "callseq_start";

+ case ISD::CALLSEQ_END: return "callseq_end";

+ // Other operators

+ case ISD::LOAD: return "load";

+ case ISD::STORE: return "store";

+ case ISD::VAARG: return "vaarg";

+ case ISD::VACOPY: return "vacopy";

+ case ISD::VAEND: return "vaend";

+ case ISD::VASTART: return "vastart";

+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";

+ case ISD::EXTRACT_ELEMENT: return "extract_element";

+ case ISD::BUILD_PAIR: return "build_pair";

+ case ISD::STACKSAVE: return "stacksave";

+ case ISD::STACKRESTORE: return "stackrestore";

+ case ISD::TRAP: return "trap";

+ // Bit manipulation

+ case ISD::BSWAP: return "bswap";

+ case ISD::CTPOP: return "ctpop";

+ case ISD::CTTZ: return "cttz";

+ case ISD::CTLZ: return "ctlz";

+ // Debug info

+ case ISD::DBG_STOPPOINT: return "dbg_stoppoint";

+ case ISD::DEBUG_LOC: return "debug_loc";

+ // Trampolines

+ case ISD::TRAMPOLINE: return "trampoline";

+ case ISD::CONDCODE:

+ switch (cast<CondCodeSDNode>(this)->get()) {

+ default: assert(0 && "Unknown setcc condition!");

+ case ISD::SETOEQ: return "setoeq";

+ case ISD::SETOGT: return "setogt";

+ case ISD::SETOGE: return "setoge";

+ case ISD::SETOLT: return "setolt";

+ case ISD::SETOLE: return "setole";

+ case ISD::SETONE: return "setone";

+ case ISD::SETO: return "seto";

+ case ISD::SETUO: return "setuo";

+ case ISD::SETUEQ: return "setue";

+ case ISD::SETUGT: return "setugt";

+ case ISD::SETUGE: return "setuge";

+ case ISD::SETULT: return "setult";

+ case ISD::SETULE: return "setule";

+ case ISD::SETUNE: return "setune";

+ case ISD::SETEQ: return "seteq";

+ case ISD::SETGT: return "setgt";

+ case ISD::SETGE: return "setge";

+ case ISD::SETLT: return "setlt";

+ case ISD::SETLE: return "setle";

+ case ISD::SETNE: return "setne";

+ }

+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {

+ switch (AM) {

+ default:

+ return "";

+ case ISD::PRE_INC:

+ return "<pre-inc>";

+ case ISD::PRE_DEC:

+ return "<pre-dec>";

+ case ISD::POST_INC:

+ return "<post-inc>";

+ case ISD::POST_DEC:

+ return "<post-dec>";

+ }

+std::string ISD::ArgFlagsTy::getArgFlagsString() {

+ std::string S = "< ";

+ if (isZExt())

+ S += "zext ";

+ if (isSExt())

+ S += "sext ";

+ if (isInReg())

+ S += "inreg ";

+ if (isSRet())

+ S += "sret ";

+ if (isByVal())

+ S += "byval ";

+ if (isNest())

+ S += "nest ";

+ if (getByValAlign())

+ S += "byval-align:" + utostr(getByValAlign()) + " ";

+ if (getOrigAlign())

+ S += "orig-align:" + utostr(getOrigAlign()) + " ";

+ if (getByValSize())

+ S += "byval-size:" + utostr(getByValSize()) + " ";

+ return S + ">";

+void SDNode::dump() const { dump(0); }

+void SDNode::dump(const SelectionDAG *G) const {

+ print(errs(), G);

+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {

+ OS << (void*)this << ": ";

+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {

+ if (i) OS << ",";

+ if (getValueType(i) == MVT::Other)

+ OS << "ch";

+ else

+ OS << getValueType(i).getMVTString();

+ }

+ OS << " = " << getOperationName(G);

+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {

+ if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {

+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);

+ OS << "<";

+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {

+ int Idx = SVN->getMaskElt(i);

+ if (i) OS << ",";

+ if (Idx < 0)

+ OS << "u";

+ else

+ OS << Idx;

+ }

+ OS << ">";

+ }

+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {

+ OS << '<' << CSDN->getAPIntValue() << '>';

+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {

+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)

+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';

+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)

+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';

+ else {

+ OS << "<APFloat(";

+ CSDN->getValueAPF().bitcastToAPInt().dump();

+ OS << ")>";

+ }

+ } else if (const GlobalAddressSDNode *GADN =

+ dyn_cast<GlobalAddressSDNode>(this)) {

+ int64_t offset = GADN->getOffset();

+ OS << '<';

+ WriteAsOperand(OS, GADN->getGlobal());

+ OS << '>';

+ if (offset > 0)

+ OS << " + " << offset;

+ else

+ OS << " " << offset;

+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {

+ OS << "<" << FIDN->getIndex() << ">";

+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {

+ OS << "<" << JTDN->getIndex() << ">";

+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){

+ int offset = CP->getOffset();

+ if (CP->isMachineConstantPoolEntry())

+ OS << "<" << *CP->getMachineCPVal() << ">";

+ else

+ OS << "<" << *CP->getConstVal() << ">";

+ if (offset > 0)

+ OS << " + " << offset;

+ else

+ OS << " " << offset;

+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {

+ OS << "<";

+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();

+ if (LBB)

+ OS << LBB->getName() << " ";

+ OS << (const void*)BBDN->getBasicBlock() << ">";

+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {

+ if (G && R->getReg() &&

+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {

+ OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg());

+ } else {

+ OS << " #" << R->getReg();

+ }

+ } else if (const ExternalSymbolSDNode *ES =

+ dyn_cast<ExternalSymbolSDNode>(this)) {

+ OS << "'" << ES->getSymbol() << "'";

+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {

+ if (M->getValue())

+ OS << "<" << M->getValue() << ">";

+ else

+ OS << "<null>";

+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) {

+ if (M->MO.getValue())

+ OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">";

+ else

+ OS << "<null:" << M->MO.getOffset() << ">";

+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) {

+ OS << N->getArgFlags().getArgFlagsString();

+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {

+ OS << ":" << N->getVT().getMVTString();

+ }

+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {

+ const Value *SrcValue = LD->getSrcValue();

+ int SrcOffset = LD->getSrcValueOffset();

+ OS << " <";

+ if (SrcValue)

+ OS << SrcValue;

+ else

+ OS << "null";

+ OS << ":" << SrcOffset << ">";

+ bool doExt = true;

+ switch (LD->getExtensionType()) {

+ default: doExt = false; break;

+ case ISD::EXTLOAD: OS << " <anyext "; break;

+ case ISD::SEXTLOAD: OS << " <sext "; break;

+ case ISD::ZEXTLOAD: OS << " <zext "; break;

+ }

+ if (doExt)

+ OS << LD->getMemoryVT().getMVTString() << ">";

+ const char *AM = getIndexedModeName(LD->getAddressingMode());

+ if (*AM)

+ OS << " " << AM;

+ if (LD->isVolatile())

+ OS << " <volatile>";

+ OS << " alignment=" << LD->getAlignment();

+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {

+ const Value *SrcValue = ST->getSrcValue();

+ int SrcOffset = ST->getSrcValueOffset();

+ OS << " <";

+ if (SrcValue)

+ OS << SrcValue;

+ else

+ OS << "null";

+ OS << ":" << SrcOffset << ">";

+ if (ST->isTruncatingStore())

+ OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">";

+ const char *AM = getIndexedModeName(ST->getAddressingMode());

+ if (*AM)

+ OS << " " << AM;

+ if (ST->isVolatile())

+ OS << " <volatile>";

+ OS << " alignment=" << ST->getAlignment();

+ } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) {

+ const Value *SrcValue = AT->getSrcValue();

+ int SrcOffset = AT->getSrcValueOffset();

+ OS << " <";

+ if (SrcValue)

+ OS << SrcValue;

+ else

+ OS << "null";

+ OS << ":" << SrcOffset << ">";

+ if (AT->isVolatile())

+ OS << " <volatile>";

+ OS << " alignment=" << AT->getAlignment();

+ }

+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {

+ print_types(OS, G);

+ OS << " ";

+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {

+ if (i) OS << ", ";

+ OS << (void*)getOperand(i).getNode();

+ if (unsigned RN = getOperand(i).getResNo())

+ OS << ":" << RN;

+ }

+ print_details(OS, G);

+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ if (N->getOperand(i).getNode()->hasOneUse())

+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);

+ else

+ cerr << "\n" << std::string(indent+2, ' ')

+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";

+ cerr << "\n" << std::string(indent, ' ');

+ N->dump(G);

+void SelectionDAG::dump() const {

+ cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";

+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();

+ I != E; ++I) {

+ const SDNode *N = I;

+ if (!N->hasOneUse() && N != getRoot().getNode())

+ DumpNodes(N, 2, this);

+ }

+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);

+ cerr << "\n\n";

+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {

+ print_types(OS, G);

+ print_details(OS, G);

+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;

+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,

+ const SelectionDAG *G, VisitedSDNodeSet &once) {

+ if (!once.insert(N)) // If we've been here before, return now.

+ return;

+ // Dump the current SDNode, but don't end the line yet.

+ OS << std::string(indent, ' ');

+ N->printr(OS, G);

+ // Having printed this SDNode, walk the children:

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ const SDNode *child = N->getOperand(i).getNode();

+ if (i) OS << ",";

+ OS << " ";

+ if (child->getNumOperands() == 0) {

+ // This child has no grandchildren; print it inline right here.

+ child->printr(OS, G);

+ once.insert(child);

+ } else { // Just the address. FIXME: also print the child's opcode

+ OS << (void*)child;

+ if (unsigned RN = N->getOperand(i).getResNo())

+ OS << ":" << RN;

+ }

+ OS << "\n";

+ // Dump children that have grandchildren on their own line(s).

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

+ const SDNode *child = N->getOperand(i).getNode();

+ DumpNodesr(OS, child, indent+2, G, once);

+ }

+void SDNode::dumpr() const {

+ VisitedSDNodeSet once;

+ DumpNodesr(errs(), this, 0, 0, once);

+// getAddressSpace - Return the address space this GlobalAddress belongs to.

+unsigned GlobalAddressSDNode::getAddressSpace() const {

+ return getGlobal()->getType()->getAddressSpace();

+const Type *ConstantPoolSDNode::getType() const {

+ if (isMachineConstantPoolEntry())

+ return Val.MachineCPVal->getType();

+ return Val.ConstVal->getType();

+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,

+ APInt &SplatUndef,

+ unsigned &SplatBitSize,

+ bool &HasAnyUndefs,

+ unsigned MinSplatBits) {

+ MVT VT = getValueType(0);

+ assert(VT.isVector() && "Expected a vector type");

+ unsigned sz = VT.getSizeInBits();

+ if (MinSplatBits > sz)

+ return false;

+ SplatValue = APInt(sz, 0);

+ SplatUndef = APInt(sz, 0);

+ // Get the bits. Bits with undefined values (when the corresponding element

+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared

+ // in SplatValue. If any of the values are not constant, give up and return

+ // false.

+ unsigned int nOps = getNumOperands();

+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");

+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();

+ for (unsigned i = 0; i < nOps; ++i) {

+ SDValue OpVal = getOperand(i);

+ unsigned BitPos = i * EltBitSize;

+ if (OpVal.getOpcode() == ISD::UNDEF)

+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);

+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))

+ SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).

+ zextOrTrunc(sz) << BitPos);

+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))

+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;

+ else

+ return false;

+ }

+ // The build_vector is all constants or undefs. Find the smallest element

+ // size that splats the vector.

+ HasAnyUndefs = (SplatUndef != 0);

+ while (sz > 8) {

+ unsigned HalfSize = sz / 2;

+ APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);

+ APInt LowValue = APInt(SplatValue).trunc(HalfSize);

+ APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);

+ APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);

+ // If the two halves do not match (ignoring undef bits), stop here.

+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||

+ MinSplatBits > HalfSize)

+ break;

+ SplatValue = HighValue | LowValue;

+ SplatUndef = HighUndef & LowUndef;

+ sz = HalfSize;

+ }

+ SplatBitSize = sz;

+ return true;

+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {

+ // Find the first non-undef value in the shuffle mask.

+ unsigned i, e;

+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)

+ /* search */;

+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");

+ // Make sure all remaining elements are either undef or the same as the first

+ // non-undef value.

+ for (int Idx = Mask[i]; i != e; ++i)

+ if (Mask[i] >= 0 && Mask[i] != Idx)

+ return false;

+ return true;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
new file mode 100644
index 000000000000..889d7f5dd934
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp

@@ -0,0 +1,6052 @@

+//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements routines for translating from LLVM IR into SelectionDAG IR.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "isel"

+#include "SelectionDAGBuild.h"

+#include "llvm/ADT/BitVector.h"

+#include "llvm/ADT/SmallSet.h"

+#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Constants.h"

+#include "llvm/CallingConv.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/Function.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/InlineAsm.h"

+#include "llvm/Instructions.h"

+#include "llvm/Intrinsics.h"

+#include "llvm/IntrinsicInst.h"

+#include "llvm/Module.h"

+#include "llvm/CodeGen/FastISel.h"

+#include "llvm/CodeGen/GCStrategy.h"

+#include "llvm/CodeGen/GCMetadata.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/MachineInstrBuilder.h"

+#include "llvm/CodeGen/MachineJumpTableInfo.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/CodeGen/DwarfWriter.h"

+#include "llvm/Analysis/DebugInfo.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetFrameInfo.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetIntrinsicInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/MathExtras.h"

+#include "llvm/Support/raw_ostream.h"

+#include <algorithm>

+using namespace llvm;

+/// LimitFloatPrecision - Generate low-precision inline sequences for

+/// some float libcalls (6, 8 or 12 bits).

+static unsigned LimitFloatPrecision;

+static cl::opt<unsigned, true>

+LimitFPPrecision("limit-float-precision",

+ cl::desc("Generate low-precision inline sequences "

+ "for some float libcalls"),

+ cl::location(LimitFloatPrecision),

+ cl::init(0));

+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence

+/// of insertvalue or extractvalue indices that identify a member, return

+/// the linearized index of the start of the member.

+///

+static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,

+ const unsigned *Indices,

+ const unsigned *IndicesEnd,

+ unsigned CurIndex = 0) {

+ // Base case: We're done.

+ if (Indices && Indices == IndicesEnd)

+ return CurIndex;

+ // Given a struct type, recursively traverse the elements.

+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {

+ for (StructType::element_iterator EB = STy->element_begin(),

+ EI = EB,

+ EE = STy->element_end();

+ EI != EE; ++EI) {

+ if (Indices && *Indices == unsigned(EI - EB))

+ return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);

+ CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);

+ }

+ return CurIndex;

+ }

+ // Given an array type, recursively traverse the elements.

+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {

+ const Type *EltTy = ATy->getElementType();

+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {

+ if (Indices && *Indices == i)

+ return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);

+ CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);

+ }

+ return CurIndex;

+ }

+ // We haven't found the type we're looking for, so keep searching.

+ return CurIndex + 1;

+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of

+/// MVTs that represent all the individual underlying

+/// non-aggregate types that comprise it.

+///

+/// If Offsets is non-null, it points to a vector to be filled in

+/// with the in-memory offsets of each of the individual values.

+///

+static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,

+ SmallVectorImpl<MVT> &ValueVTs,

+ SmallVectorImpl<uint64_t> *Offsets = 0,

+ uint64_t StartingOffset = 0) {

+ // Given a struct type, recursively traverse the elements.

+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {

+ const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);

+ for (StructType::element_iterator EB = STy->element_begin(),

+ EI = EB,

+ EE = STy->element_end();

+ EI != EE; ++EI)

+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,

+ StartingOffset + SL->getElementOffset(EI - EB));

+ return;

+ }

+ // Given an array type, recursively traverse the elements.

+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {

+ const Type *EltTy = ATy->getElementType();

+ uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);

+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)

+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,

+ StartingOffset + i * EltSize);

+ return;

+ }

+ // Interpret void as zero return values.

+ if (Ty == Type::VoidTy)

+ return;

+ // Base case: we can get an MVT for this LLVM IR type.

+ ValueVTs.push_back(TLI.getValueType(Ty));

+ if (Offsets)

+ Offsets->push_back(StartingOffset);

+namespace llvm {

+ /// RegsForValue - This struct represents the registers (physical or virtual)

+ /// that a particular set of values is assigned, and the type information about

+ /// the value. The most common situation is to represent one value at a time,

+ /// but struct or array values are handled element-wise as multiple values.

+ /// The splitting of aggregates is performed recursively, so that we never

+ /// have aggregate-typed registers. The values at this point do not necessarily

+ /// have legal types, so each value may require one or more registers of some

+ /// legal type.

+ ///

+ struct VISIBILITY_HIDDEN RegsForValue {

+ /// TLI - The TargetLowering object.

+ ///

+ const TargetLowering *TLI;

+ /// ValueVTs - The value types of the values, which may not be legal, and

+ /// may need be promoted or synthesized from one or more registers.

+ ///

+ SmallVector<MVT, 4> ValueVTs;

+ /// RegVTs - The value types of the registers. This is the same size as

+ /// ValueVTs and it records, for each value, what the type of the assigned

+ /// register or registers are. (Individual values are never synthesized

+ /// from more than one type of register.)

+ ///

+ /// With virtual registers, the contents of RegVTs is redundant with TLI's

+ /// getRegisterType member function, however when with physical registers

+ /// it is necessary to have a separate record of the types.

+ ///

+ SmallVector<MVT, 4> RegVTs;

+ /// Regs - This list holds the registers assigned to the values.

+ /// Each legal or promoted value requires one register, and each

+ /// expanded value requires multiple registers.

+ ///

+ SmallVector<unsigned, 4> Regs;

+ RegsForValue() : TLI(0) {}

+ RegsForValue(const TargetLowering &tli,

+ const SmallVector<unsigned, 4> &regs,

+ MVT regvt, MVT valuevt)

+ : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}

+ RegsForValue(const TargetLowering &tli,

+ const SmallVector<unsigned, 4> &regs,

+ const SmallVector<MVT, 4> &regvts,

+ const SmallVector<MVT, 4> &valuevts)

+ : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}

+ RegsForValue(const TargetLowering &tli,

+ unsigned Reg, const Type *Ty) : TLI(&tli) {

+ ComputeValueVTs(tli, Ty, ValueVTs);

+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {

+ MVT ValueVT = ValueVTs[Value];

+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);

+ MVT RegisterVT = TLI->getRegisterType(ValueVT);

+ for (unsigned i = 0; i != NumRegs; ++i)

+ Regs.push_back(Reg + i);

+ RegVTs.push_back(RegisterVT);

+ Reg += NumRegs;

+ }

+ /// append - Add the specified values to this one.

+ void append(const RegsForValue &RHS) {

+ TLI = RHS.TLI;

+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());

+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());

+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());

+ }

+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from

+ /// this value and returns the result as a ValueVTs value. This uses

+ /// Chain/Flag as the input and updates them for the output Chain/Flag.

+ /// If the Flag pointer is NULL, no flag is used.

+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,

+ SDValue &Chain, SDValue *Flag) const;

+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the

+ /// specified value into the registers specified by this object. This uses

+ /// Chain/Flag as the input and updates them for the output Chain/Flag.

+ /// If the Flag pointer is NULL, no flag is used.

+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,

+ SDValue &Chain, SDValue *Flag) const;

+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node

+ /// operand list. This adds the code marker, matching input operand index

+ /// (if applicable), and includes the number of values added into it.

+ void AddInlineAsmOperands(unsigned Code,

+ bool HasMatching, unsigned MatchingIdx,

+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;

+ };

+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by

+/// PHI nodes or outside of the basic block that defines it, or used by a

+/// switch or atomic instruction, which may expand to multiple basic blocks.

+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {

+ if (isa<PHINode>(I)) return true;

+ BasicBlock *BB = I->getParent();

+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)

+ if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))

+ return true;

+ return false;

+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the

+/// entry block, return true. This includes arguments used by switches, since

+/// the switch may expand into multiple basic blocks.

+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {

+ // With FastISel active, we may be splitting blocks, so force creation

+ // of virtual registers for all non-dead arguments.

+ // Don't force virtual registers for byval arguments though, because

+ // fast-isel can't handle those in all cases.

+ if (EnableFastISel && !A->hasByValAttr())

+ return A->use_empty();

+ BasicBlock *Entry = A->getParent()->begin();

+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)

+ if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))

+ return false; // Use not in entry block.

+ return true;

+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)

+ : TLI(tli) {

+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,

+ SelectionDAG &DAG,

+ bool EnableFastISel) {

+ Fn = &fn;

+ MF = &mf;

+ RegInfo = &MF->getRegInfo();

+ // Create a vreg for each argument register that is not dead and is used

+ // outside of the entry block for the function.

+ for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();

+ AI != E; ++AI)

+ if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))

+ InitializeRegForValue(AI);

+ // Initialize the mapping of values to registers. This is only set up for

+ // instruction values that are used outside of the block that defines

+ // them.

+ Function::iterator BB = Fn->begin(), EB = Fn->end();

+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)

+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))

+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {

+ const Type *Ty = AI->getAllocatedType();

+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);

+ unsigned Align =

+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),

+ AI->getAlignment());

+ TySize *= CUI->getZExtValue(); // Get total allocated size.

+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.

+ StaticAllocaMap[AI] =

+ MF->getFrameInfo()->CreateStackObject(TySize, Align);

+ }

+ for (; BB != EB; ++BB)

+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)

+ if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))

+ if (!isa<AllocaInst>(I) ||

+ !StaticAllocaMap.count(cast<AllocaInst>(I)))

+ InitializeRegForValue(I);

+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This

+ // also creates the initial PHI MachineInstrs, though none of the input

+ // operands are populated.

+ for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {

+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);

+ MBBMap[BB] = MBB;

+ MF->push_back(MBB);

+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as

+ // appropriate.

+ PHINode *PN;

+ DebugLoc DL;

+ for (BasicBlock::iterator

+ I = BB->begin(), E = BB->end(); I != E; ++I) {

+ if (CallInst *CI = dyn_cast<CallInst>(I)) {

+ if (Function *F = CI->getCalledFunction()) {

+ switch (F->getIntrinsicID()) {

+ default: break;

+ case Intrinsic::dbg_stoppoint: {

+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);

+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(),

+ CodeGenOpt::Default)) {

+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));

+ unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(),

+ SPI->getLine(),

+ SPI->getColumn());

+ DL = DebugLoc::get(idx);

+ }

+ break;

+ }

+ case Intrinsic::dbg_func_start: {

+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);

+ Value *SP = FSI->getSubprogram();

+ if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) {

+ DISubprogram Subprogram(cast<GlobalVariable>(SP));

+ DICompileUnit CU(Subprogram.getCompileUnit());

+ unsigned Line = Subprogram.getLineNumber();

+ DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(),

+ Line, 0));

+ }

+ break;

+ }

+ PN = dyn_cast<PHINode>(I);

+ if (!PN || PN->use_empty()) continue;

+ unsigned PHIReg = ValueMap[PN];

+ assert(PHIReg && "PHI node does not have an assigned virtual register!");

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);

+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {

+ MVT VT = ValueVTs[vti];

+ unsigned NumRegisters = TLI.getNumRegisters(VT);

+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();

+ for (unsigned i = 0; i != NumRegisters; ++i)

+ BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);

+ PHIReg += NumRegisters;

+ }

+unsigned FunctionLoweringInfo::MakeReg(MVT VT) {

+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));

+/// CreateRegForValue - Allocate the appropriate number of virtual registers of

+/// the correctly promoted or expanded types. Assign these registers

+/// consecutive vreg numbers and return the first assigned number.

+///

+/// In the case that the given value has struct or array type, this function

+/// will assign registers for each member or element.

+///

+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, V->getType(), ValueVTs);

+ unsigned FirstReg = 0;

+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {

+ MVT ValueVT = ValueVTs[Value];

+ MVT RegisterVT = TLI.getRegisterType(ValueVT);

+ unsigned NumRegs = TLI.getNumRegisters(ValueVT);

+ for (unsigned i = 0; i != NumRegs; ++i) {

+ unsigned R = MakeReg(RegisterVT);

+ if (!FirstReg) FirstReg = R;

+ }

+ return FirstReg;

+/// getCopyFromParts - Create a value that contains the specified legal parts

+/// combined into the value they represent. If the parts combine to a type

+/// larger then ValueVT then AssertOp can be used to specify whether the extra

+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT

+/// (ISD::AssertSext).

+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,

+ const SDValue *Parts,

+ unsigned NumParts, MVT PartVT, MVT ValueVT,

+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {

+ assert(NumParts > 0 && "No parts to assemble!");

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ SDValue Val = Parts[0];

+ if (NumParts > 1) {

+ // Assemble the value from multiple parts.

+ if (!ValueVT.isVector() && ValueVT.isInteger()) {

+ unsigned PartBits = PartVT.getSizeInBits();

+ unsigned ValueBits = ValueVT.getSizeInBits();

+ // Assemble the power of 2 part.

+ unsigned RoundParts = NumParts & (NumParts - 1) ?

+ 1 << Log2_32(NumParts) : NumParts;

+ unsigned RoundBits = PartBits * RoundParts;

+ MVT RoundVT = RoundBits == ValueBits ?

+ ValueVT : MVT::getIntegerVT(RoundBits);

+ SDValue Lo, Hi;

+ MVT HalfVT = MVT::getIntegerVT(RoundBits/2);

+ if (RoundParts > 2) {

+ Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);

+ Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,

+ PartVT, HalfVT);

+ } else {

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);

+ }

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);

+ if (RoundParts < NumParts) {

+ // Assemble the trailing non-power-of-2 part.

+ unsigned OddParts = NumParts - RoundParts;

+ MVT OddVT = MVT::getIntegerVT(OddParts * PartBits);

+ Hi = getCopyFromParts(DAG, dl,

+ Parts+RoundParts, OddParts, PartVT, OddVT);

+ // Combine the round and odd parts.

+ Lo = Val;

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits);

+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);

+ Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,

+ DAG.getConstant(Lo.getValueType().getSizeInBits(),

+ TLI.getPointerTy()));

+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);

+ Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);

+ }

+ } else if (ValueVT.isVector()) {

+ // Handle a multi-element vector.

+ MVT IntermediateVT, RegisterVT;

+ unsigned NumIntermediates;

+ unsigned NumRegs =

+ TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,

+ RegisterVT);

+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");

+ NumParts = NumRegs; // Silence a compiler warning.

+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");

+ assert(RegisterVT == Parts[0].getValueType() &&

+ "Part type doesn't match part!");

+ // Assemble the parts into intermediate operands.

+ SmallVector<SDValue, 8> Ops(NumIntermediates);

+ if (NumIntermediates == NumParts) {

+ // If the register was not expanded, truncate or copy the value,

+ // as appropriate.

+ for (unsigned i = 0; i != NumParts; ++i)

+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,

+ PartVT, IntermediateVT);

+ } else if (NumParts > 0) {

+ // If the intermediate type was expanded, build the intermediate operands

+ // from the parts.

+ assert(NumParts % NumIntermediates == 0 &&

+ "Must expand into a divisible number of parts!");

+ unsigned Factor = NumParts / NumIntermediates;

+ for (unsigned i = 0; i != NumIntermediates; ++i)

+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,

+ PartVT, IntermediateVT);

+ }

+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate

+ // operands.

+ Val = DAG.getNode(IntermediateVT.isVector() ?

+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,

+ ValueVT, &Ops[0], NumIntermediates);

+ } else if (PartVT.isFloatingPoint()) {

+ // FP split into multiple FP parts (for ppcf128)

+ assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&

+ "Unexpected split");

+ SDValue Lo, Hi;

+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);

+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);

+ if (TLI.isBigEndian())

+ std::swap(Lo, Hi);

+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);

+ } else {

+ // FP split into integer parts (soft fp)

+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&

+ !PartVT.isVector() && "Unexpected split");

+ MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());

+ Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);

+ }

+ // There is now one part, held in Val. Correct it to match ValueVT.

+ PartVT = Val.getValueType();

+ if (PartVT == ValueVT)

+ return Val;

+ if (PartVT.isVector()) {

+ assert(ValueVT.isVector() && "Unknown vector conversion!");

+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);

+ }

+ if (ValueVT.isVector()) {

+ assert(ValueVT.getVectorElementType() == PartVT &&

+ ValueVT.getVectorNumElements() == 1 &&

+ "Only trivial scalar-to-vector conversions should get here!");

+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);

+ }

+ if (PartVT.isInteger() &&

+ ValueVT.isInteger()) {

+ if (ValueVT.bitsLT(PartVT)) {

+ // For a truncate, see if we have any information to

+ // indicate whether the truncated bits will always be

+ // zero or sign-extension.

+ if (AssertOp != ISD::DELETED_NODE)

+ Val = DAG.getNode(AssertOp, dl, PartVT, Val,

+ DAG.getValueType(ValueVT));

+ return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);

+ } else {

+ return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);

+ }

+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {

+ if (ValueVT.bitsLT(Val.getValueType()))

+ // FP_ROUND's are always exact here.

+ return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,

+ DAG.getIntPtrConstant(1));

+ return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);

+ }

+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())

+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);

+ assert(0 && "Unknown mismatch!");

+ return SDValue();

+/// getCopyToParts - Create a series of nodes that contain the specified value

+/// split into legal parts. If the parts contain more bits than Val, then, for

+/// integers, ExtendKind can be used to specify how to generate the extra bits.

+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,

+ SDValue *Parts, unsigned NumParts, MVT PartVT,

+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ MVT PtrVT = TLI.getPointerTy();

+ MVT ValueVT = Val.getValueType();

+ unsigned PartBits = PartVT.getSizeInBits();

+ unsigned OrigNumParts = NumParts;

+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");

+ if (!NumParts)

+ return;

+ if (!ValueVT.isVector()) {

+ if (PartVT == ValueVT) {

+ assert(NumParts == 1 && "No-op copy with multiple parts!");

+ Parts[0] = Val;

+ return;

+ }

+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {

+ // If the parts cover more bits than the value has, promote the value.

+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {

+ assert(NumParts == 1 && "Do not know what to promote to!");

+ Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);

+ } else if (PartVT.isInteger() && ValueVT.isInteger()) {

+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);

+ Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);

+ } else {

+ assert(0 && "Unknown mismatch!");

+ }

+ } else if (PartBits == ValueVT.getSizeInBits()) {

+ // Different types of the same size.

+ assert(NumParts == 1 && PartVT != ValueVT);

+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);

+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {

+ // If the parts cover less bits than value has, truncate the value.

+ if (PartVT.isInteger() && ValueVT.isInteger()) {

+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);

+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);

+ } else {

+ assert(0 && "Unknown mismatch!");

+ }

+ // The value may have changed - recompute ValueVT.

+ ValueVT = Val.getValueType();

+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&

+ "Failed to tile the value with PartVT!");

+ if (NumParts == 1) {

+ assert(PartVT == ValueVT && "Type conversion failed!");

+ Parts[0] = Val;

+ return;

+ }

+ // Expand the value into multiple parts.

+ if (NumParts & (NumParts - 1)) {

+ // The number of parts is not a power of 2. Split off and copy the tail.

+ assert(PartVT.isInteger() && ValueVT.isInteger() &&

+ "Do not know what to expand to!");

+ unsigned RoundParts = 1 << Log2_32(NumParts);

+ unsigned RoundBits = RoundParts * PartBits;

+ unsigned OddParts = NumParts - RoundParts;

+ SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,

+ DAG.getConstant(RoundBits,

+ TLI.getPointerTy()));

+ getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);

+ if (TLI.isBigEndian())

+ // The odd parts were reversed by getCopyToParts - unreverse them.

+ std::reverse(Parts + RoundParts, Parts + NumParts);

+ NumParts = RoundParts;

+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);

+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);

+ }

+ // The number of parts is a power of 2. Repeatedly bisect the value using

+ // EXTRACT_ELEMENT.

+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::getIntegerVT(ValueVT.getSizeInBits()),

+ Val);

+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {

+ for (unsigned i = 0; i < NumParts; i += StepSize) {

+ unsigned ThisBits = StepSize * PartBits / 2;

+ MVT ThisVT = MVT::getIntegerVT (ThisBits);

+ SDValue &Part0 = Parts[i];

+ SDValue &Part1 = Parts[i+StepSize/2];

+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,

+ ThisVT, Part0,

+ DAG.getConstant(1, PtrVT));

+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,

+ ThisVT, Part0,

+ DAG.getConstant(0, PtrVT));

+ if (ThisBits == PartBits && ThisVT != PartVT) {

+ Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,

+ PartVT, Part0);

+ Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,

+ PartVT, Part1);

+ }

+ if (TLI.isBigEndian())

+ std::reverse(Parts, Parts + OrigNumParts);

+ return;

+ }

+ // Vector ValueVT.

+ if (NumParts == 1) {

+ if (PartVT != ValueVT) {

+ if (PartVT.isVector()) {

+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);

+ } else {

+ assert(ValueVT.getVectorElementType() == PartVT &&

+ ValueVT.getVectorNumElements() == 1 &&

+ "Only trivial vector-to-scalar conversions should get here!");

+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,

+ PartVT, Val,

+ DAG.getConstant(0, PtrVT));

+ }

+ Parts[0] = Val;

+ return;

+ }

+ // Handle a multi-element vector.

+ MVT IntermediateVT, RegisterVT;

+ unsigned NumIntermediates;

+ unsigned NumRegs = TLI

+ .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,

+ RegisterVT);

+ unsigned NumElements = ValueVT.getVectorNumElements();

+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");

+ NumParts = NumRegs; // Silence a compiler warning.

+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");

+ // Split the vector into intermediate operands.

+ SmallVector<SDValue, 8> Ops(NumIntermediates);

+ for (unsigned i = 0; i != NumIntermediates; ++i)

+ if (IntermediateVT.isVector())

+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,

+ IntermediateVT, Val,

+ DAG.getConstant(i * (NumElements / NumIntermediates),

+ PtrVT));

+ else

+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,

+ IntermediateVT, Val,

+ DAG.getConstant(i, PtrVT));

+ // Split the intermediate operands into legal parts.

+ if (NumParts == NumIntermediates) {

+ // If the register was not expanded, promote or copy the value,

+ // as appropriate.

+ for (unsigned i = 0; i != NumParts; ++i)

+ getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);

+ } else if (NumParts > 0) {

+ // If the intermediate type was expanded, split each the value into

+ // legal parts.

+ assert(NumParts % NumIntermediates == 0 &&

+ "Must expand into a divisible number of parts!");

+ unsigned Factor = NumParts / NumIntermediates;

+ for (unsigned i = 0; i != NumIntermediates; ++i)

+ getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);

+ }

+void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {

+ AA = &aa;

+ GFI = gfi;

+ TD = DAG.getTarget().getTargetData();

+/// clear - Clear out the curret SelectionDAG and the associated

+/// state and prepare this SelectionDAGLowering object to be used

+/// for a new block. This doesn't clear out information about

+/// additional blocks that are needed to complete switch lowering

+/// or PHI node updating; that information is cleared out as it is

+/// consumed.

+void SelectionDAGLowering::clear() {

+ NodeMap.clear();

+ PendingLoads.clear();

+ PendingExports.clear();

+ DAG.clear();

+ CurDebugLoc = DebugLoc::getUnknownLoc();

+/// getRoot - Return the current virtual root of the Selection DAG,

+/// flushing any PendingLoad items. This must be done before emitting

+/// a store or any other node that may need to be ordered after any

+/// prior load instructions.

+///

+SDValue SelectionDAGLowering::getRoot() {

+ if (PendingLoads.empty())

+ return DAG.getRoot();

+ if (PendingLoads.size() == 1) {

+ SDValue Root = PendingLoads[0];

+ DAG.setRoot(Root);

+ PendingLoads.clear();

+ return Root;

+ }

+ // Otherwise, we have to make a token factor node.

+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,

+ &PendingLoads[0], PendingLoads.size());

+ PendingLoads.clear();

+ DAG.setRoot(Root);

+ return Root;

+/// getControlRoot - Similar to getRoot, but instead of flushing all the

+/// PendingLoad items, flush all the PendingExports items. It is necessary

+/// to do this before emitting a terminator instruction.

+///

+SDValue SelectionDAGLowering::getControlRoot() {

+ SDValue Root = DAG.getRoot();

+ if (PendingExports.empty())

+ return Root;

+ // Turn all of the CopyToReg chains into one factored node.

+ if (Root.getOpcode() != ISD::EntryToken) {

+ unsigned i = 0, e = PendingExports.size();

+ for (; i != e; ++i) {

+ assert(PendingExports[i].getNode()->getNumOperands() > 1);

+ if (PendingExports[i].getNode()->getOperand(0) == Root)

+ break; // Don't add the root if we already indirectly depend on it.

+ }

+ if (i == e)

+ PendingExports.push_back(Root);

+ }

+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,

+ &PendingExports[0],

+ PendingExports.size());

+ PendingExports.clear();

+ DAG.setRoot(Root);

+ return Root;

+void SelectionDAGLowering::visit(Instruction &I) {

+ visit(I.getOpcode(), I);

+void SelectionDAGLowering::visit(unsigned Opcode, User &I) {

+ // Note: this doesn't use InstVisitor, because it has to work with

+ // ConstantExpr's in addition to instructions.

+ switch (Opcode) {

+ default: assert(0 && "Unknown instruction type encountered!");

+ abort();

+ // Build the switch statement using the Instruction.def file.

+#define HANDLE_INST(NUM, OPCODE, CLASS) \

+ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);

+#include "llvm/Instruction.def"

+ }

+void SelectionDAGLowering::visitAdd(User &I) {

+ if (I.getType()->isFPOrFPVector())

+ visitBinary(I, ISD::FADD);

+ else

+ visitBinary(I, ISD::ADD);

+void SelectionDAGLowering::visitMul(User &I) {

+ if (I.getType()->isFPOrFPVector())

+ visitBinary(I, ISD::FMUL);

+ else

+ visitBinary(I, ISD::MUL);

+SDValue SelectionDAGLowering::getValue(const Value *V) {

+ SDValue &N = NodeMap[V];

+ if (N.getNode()) return N;

+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {

+ MVT VT = TLI.getValueType(V->getType(), true);

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))

+ return N = DAG.getConstant(*CI, VT);

+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))

+ return N = DAG.getGlobalAddress(GV, VT);

+ if (isa<ConstantPointerNull>(C))

+ return N = DAG.getConstant(0, TLI.getPointerTy());

+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))

+ return N = DAG.getConstantFP(*CFP, VT);

+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())

+ return N = DAG.getUNDEF(VT);

+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {

+ visit(CE->getOpcode(), *CE);

+ SDValue N1 = NodeMap[V];

+ assert(N1.getNode() && "visit didn't populate the ValueMap!");

+ return N1;

+ }

+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {

+ SmallVector<SDValue, 4> Constants;

+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();

+ OI != OE; ++OI) {

+ SDNode *Val = getValue(*OI).getNode();

+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)

+ Constants.push_back(SDValue(Val, i));

+ }

+ return DAG.getMergeValues(&Constants[0], Constants.size(),

+ getCurDebugLoc());

+ }

+ if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {

+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&

+ "Unknown struct or array constant!");

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, C->getType(), ValueVTs);

+ unsigned NumElts = ValueVTs.size();

+ if (NumElts == 0)

+ return SDValue(); // empty struct

+ SmallVector<SDValue, 4> Constants(NumElts);

+ for (unsigned i = 0; i != NumElts; ++i) {

+ MVT EltVT = ValueVTs[i];

+ if (isa<UndefValue>(C))

+ Constants[i] = DAG.getUNDEF(EltVT);

+ else if (EltVT.isFloatingPoint())

+ Constants[i] = DAG.getConstantFP(0, EltVT);

+ else

+ Constants[i] = DAG.getConstant(0, EltVT);

+ }

+ return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());

+ }

+ const VectorType *VecTy = cast<VectorType>(V->getType());

+ unsigned NumElements = VecTy->getNumElements();

+ // Now that we know the number and type of the elements, get that number of

+ // elements into the Ops array based on what kind of constant it is.

+ SmallVector<SDValue, 16> Ops;

+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {

+ for (unsigned i = 0; i != NumElements; ++i)

+ Ops.push_back(getValue(CP->getOperand(i)));

+ } else {

+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");

+ MVT EltVT = TLI.getValueType(VecTy->getElementType());

+ SDValue Op;

+ if (EltVT.isFloatingPoint())

+ Op = DAG.getConstantFP(0, EltVT);

+ else

+ Op = DAG.getConstant(0, EltVT);

+ Ops.assign(NumElements, Op);

+ }

+ // Create a BUILD_VECTOR node.

+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),

+ VT, &Ops[0], Ops.size());

+ }

+ // If this is a static alloca, generate it as the frameindex instead of

+ // computation.

+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {

+ DenseMap<const AllocaInst*, int>::iterator SI =

+ FuncInfo.StaticAllocaMap.find(AI);

+ if (SI != FuncInfo.StaticAllocaMap.end())

+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());

+ }

+ unsigned InReg = FuncInfo.ValueMap[V];

+ assert(InReg && "Value not in map!");

+ RegsForValue RFV(TLI, InReg, V->getType());

+ SDValue Chain = DAG.getEntryNode();

+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);

+void SelectionDAGLowering::visitRet(ReturnInst &I) {

+ if (I.getNumOperands() == 0) {

+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(),

+ MVT::Other, getControlRoot()));

+ return;

+ }

+ SmallVector<SDValue, 8> NewValues;

+ NewValues.push_back(getControlRoot());

+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);

+ unsigned NumValues = ValueVTs.size();

+ if (NumValues == 0) continue;

+ SDValue RetOp = getValue(I.getOperand(i));

+ for (unsigned j = 0, f = NumValues; j != f; ++j) {

+ MVT VT = ValueVTs[j];

+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

+ const Function *F = I.getParent()->getParent();

+ if (F->paramHasAttr(0, Attribute::SExt))

+ ExtendKind = ISD::SIGN_EXTEND;

+ else if (F->paramHasAttr(0, Attribute::ZExt))

+ ExtendKind = ISD::ZERO_EXTEND;

+ // FIXME: C calling convention requires the return type to be promoted to

+ // at least 32-bit. But this is not necessary for non-C calling

+ // conventions. The frontend should mark functions whose return values

+ // require promoting with signext or zeroext attributes.

+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {

+ MVT MinVT = TLI.getRegisterType(MVT::i32);

+ if (VT.bitsLT(MinVT))

+ VT = MinVT;

+ }

+ unsigned NumParts = TLI.getNumRegisters(VT);

+ MVT PartVT = TLI.getRegisterType(VT);

+ SmallVector<SDValue, 4> Parts(NumParts);

+ getCopyToParts(DAG, getCurDebugLoc(),

+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),

+ &Parts[0], NumParts, PartVT, ExtendKind);

+ // 'inreg' on function refers to return value

+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();

+ if (F->paramHasAttr(0, Attribute::InReg))

+ Flags.setInReg();

+ for (unsigned i = 0; i < NumParts; ++i) {

+ NewValues.push_back(Parts[i]);

+ NewValues.push_back(DAG.getArgFlags(Flags));

+ }

+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other,

+ &NewValues[0], NewValues.size()));

+/// CopyToExportRegsIfNeeded - If the given value has virtual registers

+/// created for it, emit nodes to copy the value into the virtual

+/// registers.

+void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {

+ if (!V->use_empty()) {

+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);

+ if (VMI != FuncInfo.ValueMap.end())

+ CopyValueToVirtualRegister(V, VMI->second);

+ }

+/// ExportFromCurrentBlock - If this condition isn't known to be exported from

+/// the current basic block, add it to ValueMap now so that we'll get a

+/// CopyTo/FromReg.

+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {

+ // No need to export constants.

+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;

+ // Already exported?

+ if (FuncInfo.isExportedInst(V)) return;

+ unsigned Reg = FuncInfo.InitializeRegForValue(V);

+ CopyValueToVirtualRegister(V, Reg);

+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,

+ const BasicBlock *FromBB) {

+ // The operands of the setcc have to be in this block. We don't know

+ // how to export them from some other block.

+ if (Instruction *VI = dyn_cast<Instruction>(V)) {

+ // Can export from current BB.

+ if (VI->getParent() == FromBB)

+ return true;

+ // Is already exported, noop.

+ return FuncInfo.isExportedInst(V);

+ }

+ // If this is an argument, we can export it if the BB is the entry block or

+ // if it is already exported.

+ if (isa<Argument>(V)) {

+ if (FromBB == &FromBB->getParent()->getEntryBlock())

+ return true;

+ // Otherwise, can only export this if it is already exported.

+ return FuncInfo.isExportedInst(V);

+ }

+ // Otherwise, constants can always be exported.

+ return true;

+static bool InBlock(const Value *V, const BasicBlock *BB) {

+ if (const Instruction *I = dyn_cast<Instruction>(V))

+ return I->getParent() == BB;

+ return true;

+/// getFCmpCondCode - Return the ISD condition code corresponding to

+/// the given LLVM IR floating-point condition code. This includes

+/// consideration of global floating-point math flags.

+///

+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {

+ ISD::CondCode FPC, FOC;

+ switch (Pred) {

+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;

+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;

+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;

+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;

+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;

+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;

+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;

+ case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;

+ case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;

+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;

+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;

+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;

+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;

+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;

+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;

+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;

+ default:

+ assert(0 && "Invalid FCmp predicate opcode!");

+ FOC = FPC = ISD::SETFALSE;

+ break;

+ }

+ if (FiniteOnlyFPMath())

+ return FOC;

+ else

+ return FPC;

+/// getICmpCondCode - Return the ISD condition code corresponding to

+/// the given LLVM IR integer condition code.

+///

+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {

+ switch (Pred) {

+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;

+ case ICmpInst::ICMP_NE: return ISD::SETNE;

+ case ICmpInst::ICMP_SLE: return ISD::SETLE;

+ case ICmpInst::ICMP_ULE: return ISD::SETULE;

+ case ICmpInst::ICMP_SGE: return ISD::SETGE;

+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;

+ case ICmpInst::ICMP_SLT: return ISD::SETLT;

+ case ICmpInst::ICMP_ULT: return ISD::SETULT;

+ case ICmpInst::ICMP_SGT: return ISD::SETGT;

+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;

+ default:

+ assert(0 && "Invalid ICmp predicate opcode!");

+ return ISD::SETNE;

+ }

+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.

+/// This function emits a branch and is used at the leaves of an OR or an

+/// AND operator tree.

+///

+void

+SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,

+ MachineBasicBlock *TBB,

+ MachineBasicBlock *FBB,

+ MachineBasicBlock *CurBB) {

+ const BasicBlock *BB = CurBB->getBasicBlock();

+ // If the leaf of the tree is a comparison, merge the condition into

+ // the caseblock.

+ if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {

+ // The operands of the cmp have to be in this block. We don't know

+ // how to export them from some other block. If this is the first block

+ // of the sequence, no exporting is needed.

+ if (CurBB == CurMBB ||

+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&

+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {

+ ISD::CondCode Condition;

+ if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {

+ Condition = getICmpCondCode(IC->getPredicate());

+ } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {

+ Condition = getFCmpCondCode(FC->getPredicate());

+ } else {

+ Condition = ISD::SETEQ; // silence warning.

+ assert(0 && "Unknown compare instruction");

+ }

+ CaseBlock CB(Condition, BOp->getOperand(0),

+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);

+ SwitchCases.push_back(CB);

+ return;

+ }

+ // Create a CaseBlock record representing this branch.

+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),

+ NULL, TBB, FBB, CurBB);

+ SwitchCases.push_back(CB);

+/// FindMergedConditions - If Cond is an expression like

+void SelectionDAGLowering::FindMergedConditions(Value *Cond,

+ MachineBasicBlock *TBB,

+ MachineBasicBlock *FBB,

+ MachineBasicBlock *CurBB,

+ unsigned Opc) {

+ // If this node is not part of the or/and tree, emit it as a branch.

+ Instruction *BOp = dyn_cast<Instruction>(Cond);

+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||

+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||

+ BOp->getParent() != CurBB->getBasicBlock() ||

+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||

+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {

+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);

+ return;

+ }

+ // Create TmpBB after CurBB.

+ MachineFunction::iterator BBI = CurBB;

+ MachineFunction &MF = DAG.getMachineFunction();

+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());

+ CurBB->getParent()->insert(++BBI, TmpBB);

+ if (Opc == Instruction::Or) {

+ // Codegen X | Y as:

+ // jmp_if_X TBB

+ // jmp TmpBB

+ // TmpBB:

+ // jmp_if_Y TBB

+ // jmp FBB

+ //

+ // Emit the LHS condition.

+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);

+ // Emit the RHS condition into TmpBB.

+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);

+ } else {

+ assert(Opc == Instruction::And && "Unknown merge op!");

+ // Codegen X & Y as:

+ // jmp_if_X TmpBB

+ // jmp FBB

+ // TmpBB:

+ // jmp_if_Y TBB

+ // jmp FBB

+ //

+ // This requires creation of TmpBB after CurBB.

+ // Emit the LHS condition.

+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);

+ // Emit the RHS condition into TmpBB.

+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);

+ }

+/// If the set of cases should be emitted as a series of branches, return true.

+/// If we should emit this as a bunch of and/or'd together conditions, return

+/// false.

+bool

+SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){

+ if (Cases.size() != 2) return true;

+ // If this is two comparisons of the same values or'd or and'd together, they

+ // will get folded into a single comparison, so don't emit two blocks.

+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&

+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||

+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&

+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {

+ return false;

+ }

+ return true;

+void SelectionDAGLowering::visitBr(BranchInst &I) {

+ // Update machine-CFG edges.

+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];

+ // Figure out which block is immediately after the current one.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ if (I.isUnconditional()) {

+ // Update machine-CFG edges.

+ CurMBB->addSuccessor(Succ0MBB);

+ // If this is not a fall-through branch, emit the branch.

+ if (Succ0MBB != NextBlock)

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),

+ MVT::Other, getControlRoot(),

+ DAG.getBasicBlock(Succ0MBB)));

+ return;

+ }

+ // If this condition is one of the special cases we handle, do special stuff

+ // now.

+ Value *CondVal = I.getCondition();

+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];

+ // If this is a series of conditions that are or'd or and'd together, emit

+ // this as a sequence of branches instead of setcc's with and/or operations.

+ // For example, instead of something like:

+ // cmp A, B

+ // C = seteq

+ // cmp D, E

+ // F = setle

+ // or C, F

+ // jnz foo

+ // Emit:

+ // cmp A, B

+ // je foo

+ // cmp D, E

+ // jle foo

+ //

+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {

+ if (BOp->hasOneUse() &&

+ (BOp->getOpcode() == Instruction::And ||

+ BOp->getOpcode() == Instruction::Or)) {

+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());

+ // If the compares in later blocks need to use values not currently

+ // exported from this block, export them now. This block should always

+ // be the first entry.

+ assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");

+ // Allow some cases to be rejected.

+ if (ShouldEmitAsBranches(SwitchCases)) {

+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {

+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);

+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);

+ }

+ // Emit the branch for this block.

+ visitSwitchCase(SwitchCases[0]);

+ SwitchCases.erase(SwitchCases.begin());

+ return;

+ }

+ // Okay, we decided not to do this, remove any inserted MBB's and clear

+ // SwitchCases.

+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)

+ CurMBB->getParent()->erase(SwitchCases[i].ThisBB);

+ SwitchCases.clear();

+ }

+ // Create a CaseBlock record representing this branch.

+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),

+ NULL, Succ0MBB, Succ1MBB, CurMBB);

+ // Use visitSwitchCase to actually insert the fast branch sequence for this

+ // cond branch.

+ visitSwitchCase(CB);

+/// visitSwitchCase - Emits the necessary code to represent a single node in

+/// the binary search tree resulting from lowering a switch instruction.

+void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {

+ SDValue Cond;

+ SDValue CondLHS = getValue(CB.CmpLHS);

+ DebugLoc dl = getCurDebugLoc();

+ // Build the setcc now.

+ if (CB.CmpMHS == NULL) {

+ // Fold "(X == true)" to X and "(X == false)" to !X to

+ // handle common cases produced by branch lowering.

+ if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)

+ Cond = CondLHS;

+ else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {

+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());

+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);

+ } else

+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);

+ } else {

+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");

+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();

+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();

+ SDValue CmpOp = getValue(CB.CmpMHS);

+ MVT VT = CmpOp.getValueType();

+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {

+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),

+ ISD::SETLE);

+ } else {

+ SDValue SUB = DAG.getNode(ISD::SUB, dl,

+ VT, CmpOp, DAG.getConstant(Low, VT));

+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,

+ DAG.getConstant(High-Low, VT), ISD::SETULE);

+ }

+ // Update successor info

+ CurMBB->addSuccessor(CB.TrueBB);

+ CurMBB->addSuccessor(CB.FalseBB);

+ // Set NextBlock to be the MBB immediately after the current one, if any.

+ // This is used to avoid emitting unnecessary branches to the next block.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ // If the lhs block is the next block, invert the condition so that we can

+ // fall through to the lhs instead of the rhs block.

+ if (CB.TrueBB == NextBlock) {

+ std::swap(CB.TrueBB, CB.FalseBB);

+ SDValue True = DAG.getConstant(1, Cond.getValueType());

+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);

+ }

+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,

+ MVT::Other, getControlRoot(), Cond,

+ DAG.getBasicBlock(CB.TrueBB));

+ // If the branch was constant folded, fix up the CFG.

+ if (BrCond.getOpcode() == ISD::BR) {

+ CurMBB->removeSuccessor(CB.FalseBB);

+ DAG.setRoot(BrCond);

+ } else {

+ // Otherwise, go ahead and insert the false branch.

+ if (BrCond == getControlRoot())

+ CurMBB->removeSuccessor(CB.TrueBB);

+ if (CB.FalseBB == NextBlock)

+ DAG.setRoot(BrCond);

+ else

+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,

+ DAG.getBasicBlock(CB.FalseBB)));

+ }

+/// visitJumpTable - Emit JumpTable node in the current MBB

+void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {

+ // Emit the code for the jump table

+ assert(JT.Reg != -1U && "Should lower JT Header first!");

+ MVT PTy = TLI.getPointerTy();

+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),

+ JT.Reg, PTy);

+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);

+ DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),

+ MVT::Other, Index.getValue(1),

+ Table, Index));

+/// visitJumpTableHeader - This function emits necessary code to produce index

+/// in the JumpTable from switch case.

+void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,

+ JumpTableHeader &JTH) {

+ // Subtract the lowest switch case value from the value being switched on and

+ // conditional branch to default mbb if the result is greater than the

+ // difference between smallest and largest cases.

+ SDValue SwitchOp = getValue(JTH.SValue);

+ MVT VT = SwitchOp.getValueType();

+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,

+ DAG.getConstant(JTH.First, VT));

+ // The SDNode we just created, which holds the value being switched on minus

+ // the the smallest case value, needs to be copied to a virtual register so it

+ // can be used as an index into the jump table in a subsequent basic block.

+ // This value may be smaller or larger than the target's pointer type, and

+ // therefore require extension or truncating.

+ if (VT.bitsGT(TLI.getPointerTy()))

+ SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ TLI.getPointerTy(), SUB);

+ else

+ SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ TLI.getPointerTy(), SUB);

+ unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());

+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),

+ JumpTableReg, SwitchOp);

+ JT.Reg = JumpTableReg;

+ // Emit the range check for the jump table, and branch to the default block

+ // for the switch statement if the value being switched on exceeds the largest

+ // case in the switch.

+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),

+ TLI.getSetCCResultType(SUB.getValueType()), SUB,

+ DAG.getConstant(JTH.Last-JTH.First,VT),

+ ISD::SETUGT);

+ // Set NextBlock to be the MBB immediately after the current one, if any.

+ // This is used to avoid emitting unnecessary branches to the next block.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),

+ MVT::Other, CopyTo, CMP,

+ DAG.getBasicBlock(JT.Default));

+ if (JT.MBB == NextBlock)

+ DAG.setRoot(BrCond);

+ else

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,

+ DAG.getBasicBlock(JT.MBB)));

+/// visitBitTestHeader - This function emits necessary code to produce value

+/// suitable for "bit tests"

+void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {

+ // Subtract the minimum value

+ SDValue SwitchOp = getValue(B.SValue);

+ MVT VT = SwitchOp.getValueType();

+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,

+ DAG.getConstant(B.First, VT));

+ // Check range

+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),

+ TLI.getSetCCResultType(SUB.getValueType()),

+ SUB, DAG.getConstant(B.Range, VT),

+ ISD::SETUGT);

+ SDValue ShiftOp;

+ if (VT.bitsGT(TLI.getPointerTy()))

+ ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ TLI.getPointerTy(), SUB);

+ else

+ ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ TLI.getPointerTy(), SUB);

+ B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());

+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),

+ B.Reg, ShiftOp);

+ // Set NextBlock to be the MBB immediately after the current one, if any.

+ // This is used to avoid emitting unnecessary branches to the next block.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;

+ CurMBB->addSuccessor(B.Default);

+ CurMBB->addSuccessor(MBB);

+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),

+ MVT::Other, CopyTo, RangeCmp,

+ DAG.getBasicBlock(B.Default));

+ if (MBB == NextBlock)

+ DAG.setRoot(BrRange);

+ else

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,

+ DAG.getBasicBlock(MBB)));

+/// visitBitTestCase - this function produces one "bit test"

+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,

+ unsigned Reg,

+ BitTestCase &B) {

+ // Make desired shift

+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,

+ TLI.getPointerTy());

+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),

+ TLI.getPointerTy(),

+ DAG.getConstant(1, TLI.getPointerTy()),

+ ShiftOp);

+ // Emit bit tests and jumps

+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),

+ TLI.getPointerTy(), SwitchVal,

+ DAG.getConstant(B.Mask, TLI.getPointerTy()));

+ SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),

+ TLI.getSetCCResultType(AndOp.getValueType()),

+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),

+ ISD::SETNE);

+ CurMBB->addSuccessor(B.TargetBB);

+ CurMBB->addSuccessor(NextMBB);

+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),

+ MVT::Other, getControlRoot(),

+ AndCmp, DAG.getBasicBlock(B.TargetBB));

+ // Set NextBlock to be the MBB immediately after the current one, if any.

+ // This is used to avoid emitting unnecessary branches to the next block.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ if (NextMBB == NextBlock)

+ DAG.setRoot(BrAnd);

+ else

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,

+ DAG.getBasicBlock(NextMBB)));

+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {

+ // Retrieve successors.

+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];

+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];

+ const Value *Callee(I.getCalledValue());

+ if (isa<InlineAsm>(Callee))

+ visitInlineAsm(&I);

+ else

+ LowerCallTo(&I, getValue(Callee), false, LandingPad);

+ // If the value of the invoke is used outside of its defining block, make it

+ // available as a virtual register.

+ CopyToExportRegsIfNeeded(&I);

+ // Update successor info

+ CurMBB->addSuccessor(Return);

+ CurMBB->addSuccessor(LandingPad);

+ // Drop into normal successor.

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),

+ MVT::Other, getControlRoot(),

+ DAG.getBasicBlock(Return)));

+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {

+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for

+/// small case ranges).

+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default) {

+ Case& BackCase = *(CR.Range.second-1);

+ // Size is the number of Cases represented by this range.

+ size_t Size = CR.Range.second - CR.Range.first;

+ if (Size > 3)

+ return false;

+ // Get the MachineFunction which holds the current MBB. This is used when

+ // inserting any additional MBBs necessary to represent the switch.

+ MachineFunction *CurMF = CurMBB->getParent();

+ // Figure out which block is immediately after the current one.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CR.CaseBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ // TODO: If any two of the cases has the same destination, and if one value

+ // is the same as the other, but has one bit unset that the other has set,

+ // use bit manipulation to do two compares at once. For example:

+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"

+ // Rearrange the case blocks so that the last one falls through if possible.

+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {

+ // The last case block won't fall through into 'NextBlock' if we emit the

+ // branches in this order. See if rearranging a case value would help.

+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {

+ if (I->BB == NextBlock) {

+ std::swap(*I, BackCase);

+ break;

+ }

+ // Create a CaseBlock record representing a conditional branch to

+ // the Case's target mbb if the value being switched on SV is equal

+ // to C.

+ MachineBasicBlock *CurBlock = CR.CaseBB;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {

+ MachineBasicBlock *FallThrough;

+ if (I != E-1) {

+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());

+ CurMF->insert(BBI, FallThrough);

+ // Put SV in a virtual register to make it available from the new blocks.

+ ExportFromCurrentBlock(SV);

+ } else {

+ // If the last case doesn't match, go to the default block.

+ FallThrough = Default;

+ }

+ Value *RHS, *LHS, *MHS;

+ ISD::CondCode CC;

+ if (I->High == I->Low) {

+ // This is just small small case range :) containing exactly 1 case

+ CC = ISD::SETEQ;

+ LHS = SV; RHS = I->High; MHS = NULL;

+ } else {

+ CC = ISD::SETLE;

+ LHS = I->Low; MHS = SV; RHS = I->High;

+ }

+ CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);

+ // If emitting the first comparison, just call visitSwitchCase to emit the

+ // code into the current block. Otherwise, push the CaseBlock onto the

+ // vector to be later processed by SDISel, and insert the node's MBB

+ // before the next MBB.

+ if (CurBlock == CurMBB)

+ visitSwitchCase(CB);

+ else

+ SwitchCases.push_back(CB);

+ CurBlock = FallThrough;

+ }

+ return true;

+static inline bool areJTsAllowed(const TargetLowering &TLI) {

+ return !DisableJumpTables &&

+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||

+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));

+static APInt ComputeRange(const APInt &First, const APInt &Last) {

+ APInt LastExt(Last), FirstExt(First);

+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;

+ LastExt.sext(BitWidth); FirstExt.sext(BitWidth);

+ return (LastExt - FirstExt + 1ULL);

+/// handleJTSwitchCase - Emit jumptable for current switch case range

+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default) {

+ Case& FrontCase = *CR.Range.first;

+ Case& BackCase = *(CR.Range.second-1);

+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();

+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();

+ size_t TSize = 0;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second;

+ I!=E; ++I)

+ TSize += I->size();

+ if (!areJTsAllowed(TLI) || TSize <= 3)

+ return false;

+ APInt Range = ComputeRange(First, Last);

+ double Density = (double)TSize / Range.roundToDouble();

+ if (Density < 0.4)

+ return false;

+ DEBUG(errs() << "Lowering jump table\n"

+ << "First entry: " << First << ". Last entry: " << Last << '\n'

+ << "Range: " << Range

+ << "Size: " << TSize << ". Density: " << Density << "\n\n");

+ // Get the MachineFunction which holds the current MBB. This is used when

+ // inserting any additional MBBs necessary to represent the switch.

+ MachineFunction *CurMF = CurMBB->getParent();

+ // Figure out which block is immediately after the current one.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CR.CaseBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();

+ // Create a new basic block to hold the code for loading the address

+ // of the jump table, and jumping to it. Update successor information;

+ // we will either branch to the default case for the switch, or the jump

+ // table.

+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);

+ CurMF->insert(BBI, JumpTableBB);

+ CR.CaseBB->addSuccessor(Default);

+ CR.CaseBB->addSuccessor(JumpTableBB);

+ // Build a vector of destination BBs, corresponding to each target

+ // of the jump table. If the value of the jump table slot corresponds to

+ // a case statement, push the case's BB onto the vector, otherwise, push

+ // the default BB.

+ std::vector<MachineBasicBlock*> DestBBs;

+ APInt TEI = First;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {

+ const APInt& Low = cast<ConstantInt>(I->Low)->getValue();

+ const APInt& High = cast<ConstantInt>(I->High)->getValue();

+ if (Low.sle(TEI) && TEI.sle(High)) {

+ DestBBs.push_back(I->BB);

+ if (TEI==High)

+ ++I;

+ } else {

+ DestBBs.push_back(Default);

+ }

+ // Update successor info. Add one edge to each unique successor.

+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());

+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),

+ E = DestBBs.end(); I != E; ++I) {

+ if (!SuccsHandled[(*I)->getNumber()]) {

+ SuccsHandled[(*I)->getNumber()] = true;

+ JumpTableBB->addSuccessor(*I);

+ }

+ // Create a jump table index for this jump table, or return an existing

+ // one.

+ unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);

+ // Set the jump table information so that we can codegen it as a second

+ // MachineBasicBlock

+ JumpTable JT(-1U, JTI, JumpTableBB, Default);

+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));

+ if (CR.CaseBB == CurMBB)

+ visitJumpTableHeader(JT, JTH);

+ JTCases.push_back(JumpTableBlock(JTH, JT));

+ return true;

+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into

+/// 2 subtrees.

+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default) {

+ // Get the MachineFunction which holds the current MBB. This is used when

+ // inserting any additional MBBs necessary to represent the switch.

+ MachineFunction *CurMF = CurMBB->getParent();

+ // Figure out which block is immediately after the current one.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CR.CaseBB;

+ if (++BBI != CurMBB->getParent()->end())

+ NextBlock = BBI;

+ Case& FrontCase = *CR.Range.first;

+ Case& BackCase = *(CR.Range.second-1);

+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();

+ // Size is the number of Cases represented by this range.

+ unsigned Size = CR.Range.second - CR.Range.first;

+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();

+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();

+ double FMetric = 0;

+ CaseItr Pivot = CR.Range.first + Size/2;

+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will

+ // (heuristically) allow us to emit JumpTable's later.

+ size_t TSize = 0;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second;

+ I!=E; ++I)

+ TSize += I->size();

+ size_t LSize = FrontCase.size();

+ size_t RSize = TSize-LSize;

+ DEBUG(errs() << "Selecting best pivot: \n"

+ << "First: " << First << ", Last: " << Last <<'\n'

+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');

+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;

+ J!=E; ++I, ++J) {

+ const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();

+ const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();

+ APInt Range = ComputeRange(LEnd, RBegin);

+ assert((Range - 2ULL).isNonNegative() &&

+ "Invalid case distance");

+ double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();

+ double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();

+ double Metric = Range.logBase2()*(LDensity+RDensity);

+ // Should always split in some non-trivial place

+ DEBUG(errs() <<"=>Step\n"

+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'

+ << "LDensity: " << LDensity

+ << ", RDensity: " << RDensity << '\n'

+ << "Metric: " << Metric << '\n');

+ if (FMetric < Metric) {

+ Pivot = J;

+ FMetric = Metric;

+ DEBUG(errs() << "Current metric set to: " << FMetric << '\n');

+ }

+ LSize += J->size();

+ RSize -= J->size();

+ }

+ if (areJTsAllowed(TLI)) {

+ // If our case is dense we *really* should handle it earlier!

+ assert((FMetric > 0) && "Should handle dense range earlier!");

+ } else {

+ Pivot = CR.Range.first + Size/2;

+ }

+ CaseRange LHSR(CR.Range.first, Pivot);

+ CaseRange RHSR(Pivot, CR.Range.second);

+ Constant *C = Pivot->Low;

+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;

+ // We know that we branch to the LHS if the Value being switched on is

+ // less than the Pivot value, C. We use this to optimize our binary

+ // tree a bit, by recognizing that if SV is greater than or equal to the

+ // LHS's Case Value, and that Case Value is exactly one less than the

+ // Pivot's Value, then we can branch directly to the LHS's Target,

+ // rather than creating a leaf node for it.

+ if ((LHSR.second - LHSR.first) == 1 &&

+ LHSR.first->High == CR.GE &&

+ cast<ConstantInt>(C)->getValue() ==

+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {

+ TrueBB = LHSR.first->BB;

+ } else {

+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);

+ CurMF->insert(BBI, TrueBB);

+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));

+ // Put SV in a virtual register to make it available from the new blocks.

+ ExportFromCurrentBlock(SV);

+ }

+ // Similar to the optimization above, if the Value being switched on is

+ // known to be less than the Constant CR.LT, and the current Case Value

+ // is CR.LT - 1, then we can branch directly to the target block for

+ // the current Case Value, rather than emitting a RHS leaf node for it.

+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&

+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==

+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {

+ FalseBB = RHSR.first->BB;

+ } else {

+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);

+ CurMF->insert(BBI, FalseBB);

+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));

+ // Put SV in a virtual register to make it available from the new blocks.

+ ExportFromCurrentBlock(SV);

+ }

+ // Create a CaseBlock record representing a conditional branch to

+ // the LHS node if the value being switched on SV is less than C.

+ // Otherwise, branch to LHS.

+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);

+ if (CR.CaseBB == CurMBB)

+ visitSwitchCase(CB);

+ else

+ SwitchCases.push_back(CB);

+ return true;

+/// handleBitTestsSwitchCase - if current case range has few destination and

+/// range span less, than machine word bitwidth, encode case range into series

+/// of masks and emit bit tests with these masks.

+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default){

+ unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits();

+ Case& FrontCase = *CR.Range.first;

+ Case& BackCase = *(CR.Range.second-1);

+ // Get the MachineFunction which holds the current MBB. This is used when

+ // inserting any additional MBBs necessary to represent the switch.

+ MachineFunction *CurMF = CurMBB->getParent();

+ // If target does not have legal shift left, do not emit bit tests at all.

+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))

+ return false;

+ size_t numCmps = 0;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second;

+ I!=E; ++I) {

+ // Single case counts one, case range - two.

+ numCmps += (I->Low == I->High ? 1 : 2);

+ }

+ // Count unique destinations

+ SmallSet<MachineBasicBlock*, 4> Dests;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {

+ Dests.insert(I->BB);

+ if (Dests.size() > 3)

+ // Don't bother the code below, if there are too much unique destinations

+ return false;

+ }

+ DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'

+ << "Total number of comparisons: " << numCmps << '\n');

+ // Compute span of values.

+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();

+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();

+ APInt cmpRange = maxValue - minValue;

+ DEBUG(errs() << "Compare range: " << cmpRange << '\n'

+ << "Low bound: " << minValue << '\n'

+ << "High bound: " << maxValue << '\n');

+ if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||

+ (!(Dests.size() == 1 && numCmps >= 3) &&

+ !(Dests.size() == 2 && numCmps >= 5) &&

+ !(Dests.size() >= 3 && numCmps >= 6)))

+ return false;

+ DEBUG(errs() << "Emitting bit tests\n");

+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());

+ // Optimize the case where all the case values fit in a

+ // word without having to subtract minValue. In this case,

+ // we can optimize away the subtraction.

+ if (minValue.isNonNegative() &&

+ maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {

+ cmpRange = maxValue;

+ } else {

+ lowBound = minValue;

+ }

+ CaseBitsVector CasesBits;

+ unsigned i, count = 0;

+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {

+ MachineBasicBlock* Dest = I->BB;

+ for (i = 0; i < count; ++i)

+ if (Dest == CasesBits[i].BB)

+ break;

+ if (i == count) {

+ assert((count < 3) && "Too much destinations to test!");

+ CasesBits.push_back(CaseBits(0, Dest, 0));

+ count++;

+ }

+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();

+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();

+ uint64_t lo = (lowValue - lowBound).getZExtValue();

+ uint64_t hi = (highValue - lowBound).getZExtValue();

+ for (uint64_t j = lo; j <= hi; j++) {

+ CasesBits[i].Mask |= 1ULL << j;

+ CasesBits[i].Bits++;

+ }

+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());

+ BitTestInfo BTC;

+ // Figure out which block is immediately after the current one.

+ MachineFunction::iterator BBI = CR.CaseBB;

+ ++BBI;

+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();

+ DEBUG(errs() << "Cases:\n");

+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {

+ DEBUG(errs() << "Mask: " << CasesBits[i].Mask

+ << ", Bits: " << CasesBits[i].Bits

+ << ", BB: " << CasesBits[i].BB << '\n');

+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);

+ CurMF->insert(BBI, CaseBB);

+ BTC.push_back(BitTestCase(CasesBits[i].Mask,

+ CaseBB,

+ CasesBits[i].BB));

+ // Put SV in a virtual register to make it available from the new blocks.

+ ExportFromCurrentBlock(SV);

+ }

+ BitTestBlock BTB(lowBound, cmpRange, SV,

+ -1U, (CR.CaseBB == CurMBB),

+ CR.CaseBB, Default, BTC);

+ if (CR.CaseBB == CurMBB)

+ visitBitTestHeader(BTB);

+ BitTestCases.push_back(BTB);

+ return true;

+/// Clusterify - Transform simple list of Cases into list of CaseRange's

+size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,

+ const SwitchInst& SI) {

+ size_t numCmps = 0;

+ // Start with "simple" cases

+ for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {

+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];

+ Cases.push_back(Case(SI.getSuccessorValue(i),

+ SI.getSuccessorValue(i),

+ SMBB));

+ }

+ std::sort(Cases.begin(), Cases.end(), CaseCmp());

+ // Merge case into clusters

+ if (Cases.size() >= 2)

+ // Must recompute end() each iteration because it may be

+ // invalidated by erase if we hold on to it

+ for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {

+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();

+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();

+ MachineBasicBlock* nextBB = J->BB;

+ MachineBasicBlock* currentBB = I->BB;

+ // If the two neighboring cases go to the same destination, merge them

+ // into a single case.

+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {

+ I->High = J->High;

+ J = Cases.erase(J);

+ } else {

+ I = J++;

+ }

+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {

+ if (I->Low != I->High)

+ // A range counts double, since it requires two compares.

+ ++numCmps;

+ }

+ return numCmps;

+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {

+ // Figure out which block is immediately after the current one.

+ MachineBasicBlock *NextBlock = 0;

+ MachineFunction::iterator BBI = CurMBB;

+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];

+ // If there is only the default destination, branch to it if it is not the

+ // next basic block. Otherwise, just fall through.

+ if (SI.getNumOperands() == 2) {

+ // Update machine-CFG edges.

+ // If this is not a fall-through branch, emit the branch.

+ CurMBB->addSuccessor(Default);

+ if (Default != NextBlock)

+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),

+ MVT::Other, getControlRoot(),

+ DAG.getBasicBlock(Default)));

+ return;

+ }

+ // If there are any non-default case statements, create a vector of Cases

+ // representing each one, and sort the vector so that we can efficiently

+ // create a binary search tree from them.

+ CaseVector Cases;

+ size_t numCmps = Clusterify(Cases, SI);

+ DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()

+ << ". Total compares: " << numCmps << '\n');

+ numCmps = 0;

+ // Get the Value to be switched on and default basic blocks, which will be

+ // inserted into CaseBlock records, representing basic blocks in the binary

+ // search tree.

+ Value *SV = SI.getOperand(0);

+ // Push the initial CaseRec onto the worklist

+ CaseRecVector WorkList;

+ WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));

+ while (!WorkList.empty()) {

+ // Grab a record representing a case range to process off the worklist

+ CaseRec CR = WorkList.back();

+ WorkList.pop_back();

+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))

+ continue;

+ // If the range has few cases (two or less) emit a series of specific

+ // tests.

+ if (handleSmallSwitchRange(CR, WorkList, SV, Default))

+ continue;

+ // If the switch has more than 5 blocks, and at least 40% dense, and the

+ // target supports indirect branches, then emit a jump table rather than

+ // lowering the switch to a binary tree of conditional branches.

+ if (handleJTSwitchCase(CR, WorkList, SV, Default))

+ continue;

+ // Emit binary tree. We need to pick a pivot, and push left and right ranges

+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.

+ handleBTSplitSwitchCase(CR, WorkList, SV, Default);

+ }

+void SelectionDAGLowering::visitSub(User &I) {

+ // -0.0 - X --> fneg

+ const Type *Ty = I.getType();

+ if (isa<VectorType>(Ty)) {

+ if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {

+ const VectorType *DestTy = cast<VectorType>(I.getType());

+ const Type *ElTy = DestTy->getElementType();

+ if (ElTy->isFloatingPoint()) {

+ unsigned VL = DestTy->getNumElements();

+ std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));

+ Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());

+ if (CV == CNZ) {

+ SDValue Op2 = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),

+ Op2.getValueType(), Op2));

+ return;

+ }

+ if (Ty->isFloatingPoint()) {

+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))

+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {

+ SDValue Op2 = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),

+ Op2.getValueType(), Op2));

+ return;

+ }

+ visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);

+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),

+ Op1.getValueType(), Op1, Op2));

+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ if (!isa<VectorType>(I.getType()) &&

+ Op2.getValueType() != TLI.getShiftAmountTy()) {

+ // If the operand is smaller than the shift count type, promote it.

+ if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))

+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),

+ TLI.getShiftAmountTy(), Op2);

+ // If the operand is larger than the shift count type but the shift

+ // count type has enough bits to represent any shift value, truncate

+ // it now. This is a common case and it exposes the truncate to

+ // optimization early.

+ else if (TLI.getShiftAmountTy().getSizeInBits() >=

+ Log2_32_Ceil(Op2.getValueType().getSizeInBits()))

+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ TLI.getShiftAmountTy(), Op2);

+ // Otherwise we'll need to temporarily settle for some other

+ // convenient type; type legalization will make adjustments as

+ // needed.

+ else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))

+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ TLI.getPointerTy(), Op2);

+ else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))

+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),

+ TLI.getPointerTy(), Op2);

+ }

+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),

+ Op1.getValueType(), Op1, Op2));

+void SelectionDAGLowering::visitICmp(User &I) {

+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;

+ if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))

+ predicate = IC->getPredicate();

+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))

+ predicate = ICmpInst::Predicate(IC->getPredicate());

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ ISD::CondCode Opcode = getICmpCondCode(predicate);

+ setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode));

+void SelectionDAGLowering::visitFCmp(User &I) {

+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;

+ if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))

+ predicate = FC->getPredicate();

+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))

+ predicate = FCmpInst::Predicate(FC->getPredicate());

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ ISD::CondCode Condition = getFCmpCondCode(predicate);

+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition));

+void SelectionDAGLowering::visitVICmp(User &I) {

+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;

+ if (VICmpInst *IC = dyn_cast<VICmpInst>(&I))

+ predicate = IC->getPredicate();

+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))

+ predicate = ICmpInst::Predicate(IC->getPredicate());

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ ISD::CondCode Opcode = getICmpCondCode(predicate);

+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(),

+ Op1, Op2, Opcode));

+void SelectionDAGLowering::visitVFCmp(User &I) {

+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;

+ if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I))

+ predicate = FC->getPredicate();

+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))

+ predicate = FCmpInst::Predicate(FC->getPredicate());

+ SDValue Op1 = getValue(I.getOperand(0));

+ SDValue Op2 = getValue(I.getOperand(1));

+ ISD::CondCode Condition = getFCmpCondCode(predicate);

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));

+void SelectionDAGLowering::visitSelect(User &I) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, I.getType(), ValueVTs);

+ unsigned NumValues = ValueVTs.size();

+ if (NumValues != 0) {

+ SmallVector<SDValue, 4> Values(NumValues);

+ SDValue Cond = getValue(I.getOperand(0));

+ SDValue TrueVal = getValue(I.getOperand(1));

+ SDValue FalseVal = getValue(I.getOperand(2));

+ for (unsigned i = 0; i != NumValues; ++i)

+ Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),

+ TrueVal.getValueType(), Cond,

+ SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),

+ SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));

+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),

+ DAG.getVTList(&ValueVTs[0], NumValues),

+ &Values[0], NumValues));

+ }

+void SelectionDAGLowering::visitTrunc(User &I) {

+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitZExt(User &I) {

+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).

+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitSExt(User &I) {

+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).

+ // SExt also can't be a cast to bool for same reason. So, nothing much to do

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitFPTrunc(User &I) {

+ // FPTrunc is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),

+ DestVT, N, DAG.getIntPtrConstant(0)));

+void SelectionDAGLowering::visitFPExt(User &I){

+ // FPTrunc is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitFPToUI(User &I) {

+ // FPToUI is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitFPToSI(User &I) {

+ // FPToSI is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitUIToFP(User &I) {

+ // UIToFP is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitSIToFP(User &I){

+ // SIToFP is never a no-op cast, no need to check

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));

+void SelectionDAGLowering::visitPtrToInt(User &I) {

+ // What to do depends on the size of the integer and the size of the pointer.

+ // We can either truncate, zero extend, or no-op, accordingly.

+ SDValue N = getValue(I.getOperand(0));

+ MVT SrcVT = N.getValueType();

+ MVT DestVT = TLI.getValueType(I.getType());

+ SDValue Result;

+ if (DestVT.bitsLT(SrcVT))

+ Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);

+ else

+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too

+ Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);

+ setValue(&I, Result);

+void SelectionDAGLowering::visitIntToPtr(User &I) {

+ // What to do depends on the size of the integer and the size of the pointer.

+ // We can either truncate, zero extend, or no-op, accordingly.

+ SDValue N = getValue(I.getOperand(0));

+ MVT SrcVT = N.getValueType();

+ MVT DestVT = TLI.getValueType(I.getType());

+ if (DestVT.bitsLT(SrcVT))

+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));

+ else

+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too

+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ DestVT, N));

+void SelectionDAGLowering::visitBitCast(User &I) {

+ SDValue N = getValue(I.getOperand(0));

+ MVT DestVT = TLI.getValueType(I.getType());

+ // BitCast assures us that source and destination are the same size so this

+ // is either a BIT_CONVERT or a no-op.

+ if (DestVT != N.getValueType())

+ setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),

+ DestVT, N)); // convert types

+ else

+ setValue(&I, N); // noop cast.

+void SelectionDAGLowering::visitInsertElement(User &I) {

+ SDValue InVec = getValue(I.getOperand(0));

+ SDValue InVal = getValue(I.getOperand(1));

+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ TLI.getPointerTy(),

+ getValue(I.getOperand(2)));

+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),

+ TLI.getValueType(I.getType()),

+ InVec, InVal, InIdx));

+void SelectionDAGLowering::visitExtractElement(User &I) {

+ SDValue InVec = getValue(I.getOperand(0));

+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ TLI.getPointerTy(),

+ getValue(I.getOperand(1)));

+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),

+ TLI.getValueType(I.getType()), InVec, InIdx));

+// Utility for visitShuffleVector - Returns true if the mask is mask starting

+// from SIndx and increasing to the element length (undefs are allowed).

+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {

+ unsigned MaskNumElts = Mask.size();

+ for (unsigned i = 0; i != MaskNumElts; ++i)

+ if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))

+ return false;

+ return true;

+void SelectionDAGLowering::visitShuffleVector(User &I) {

+ SmallVector<int, 8> Mask;

+ SDValue Src1 = getValue(I.getOperand(0));

+ SDValue Src2 = getValue(I.getOperand(1));

+ // Convert the ConstantVector mask operand into an array of ints, with -1

+ // representing undef values.

+ SmallVector<Constant*, 8> MaskElts;

+ cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);

+ unsigned MaskNumElts = MaskElts.size();

+ for (unsigned i = 0; i != MaskNumElts; ++i) {

+ if (isa<UndefValue>(MaskElts[i]))

+ Mask.push_back(-1);

+ else

+ Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());

+ }

+ MVT VT = TLI.getValueType(I.getType());

+ MVT SrcVT = Src1.getValueType();

+ unsigned SrcNumElts = SrcVT.getVectorNumElements();

+ if (SrcNumElts == MaskNumElts) {

+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,

+ &Mask[0]));

+ return;

+ }

+ // Normalize the shuffle vector since mask and vector length don't match.

+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {

+ // Mask is longer than the source vectors and is a multiple of the source

+ // vectors. We can use concatenate vector to make the mask and vectors

+ // lengths match.

+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {

+ // The shuffle is concatenating two vectors together.

+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),

+ VT, Src1, Src2));

+ return;

+ }

+ // Pad both vectors with undefs to make them the same length as the mask.

+ unsigned NumConcat = MaskNumElts / SrcNumElts;

+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;

+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;

+ SDValue UndefVal = DAG.getUNDEF(SrcVT);

+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);

+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);

+ MOps1[0] = Src1;

+ MOps2[0] = Src2;

+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,

+ getCurDebugLoc(), VT,

+ &MOps1[0], NumConcat);

+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,

+ getCurDebugLoc(), VT,

+ &MOps2[0], NumConcat);

+ // Readjust mask for new input vector length.

+ SmallVector<int, 8> MappedOps;

+ for (unsigned i = 0; i != MaskNumElts; ++i) {

+ int Idx = Mask[i];

+ if (Idx < (int)SrcNumElts)

+ MappedOps.push_back(Idx);

+ else

+ MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);

+ }

+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,

+ &MappedOps[0]));

+ return;

+ }

+ if (SrcNumElts > MaskNumElts) {

+ // Analyze the access pattern of the vector to see if we can extract

+ // two subvectors and do the shuffle. The analysis is done by calculating

+ // the range of elements the mask access on both vectors.

+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};

+ int MaxRange[2] = {-1, -1};

+ for (unsigned i = 0; i != MaskNumElts; ++i) {

+ int Idx = Mask[i];

+ int Input = 0;

+ if (Idx < 0)

+ continue;

+ if (Idx >= (int)SrcNumElts) {

+ Input = 1;

+ Idx -= SrcNumElts;

+ }

+ if (Idx > MaxRange[Input])

+ MaxRange[Input] = Idx;

+ if (Idx < MinRange[Input])

+ MinRange[Input] = Idx;

+ }

+ // Check if the access is smaller than the vector size and can we find

+ // a reasonable extract index.

+ int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract.

+ int StartIdx[2]; // StartIdx to extract from

+ for (int Input=0; Input < 2; ++Input) {

+ if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {

+ RangeUse[Input] = 0; // Unused

+ StartIdx[Input] = 0;

+ } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {

+ // Fits within range but we should see if we can find a good

+ // start index that is a multiple of the mask length.

+ if (MaxRange[Input] < (int)MaskNumElts) {

+ RangeUse[Input] = 1; // Extract from beginning of the vector

+ StartIdx[Input] = 0;

+ } else {

+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;

+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&

+ StartIdx[Input] + MaskNumElts < SrcNumElts)

+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.

+ }

+ if (RangeUse[0] == 0 && RangeUse[0] == 0) {

+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.

+ return;

+ }

+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {

+ // Extract appropriate subvector and generate a vector shuffle

+ for (int Input=0; Input < 2; ++Input) {

+ SDValue& Src = Input == 0 ? Src1 : Src2;

+ if (RangeUse[Input] == 0) {

+ Src = DAG.getUNDEF(VT);

+ } else {

+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,

+ Src, DAG.getIntPtrConstant(StartIdx[Input]));

+ }

+ // Calculate new mask.

+ SmallVector<int, 8> MappedOps;

+ for (unsigned i = 0; i != MaskNumElts; ++i) {

+ int Idx = Mask[i];

+ if (Idx < 0)

+ MappedOps.push_back(Idx);

+ else if (Idx < (int)SrcNumElts)

+ MappedOps.push_back(Idx - StartIdx[0]);

+ else

+ MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);

+ }

+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,

+ &MappedOps[0]));

+ return;

+ }

+ // We can't use either concat vectors or extract subvectors so fall back to

+ // replacing the shuffle with extract and build vector.

+ // to insert and build vector.

+ MVT EltVT = VT.getVectorElementType();

+ MVT PtrVT = TLI.getPointerTy();

+ SmallVector<SDValue,8> Ops;

+ for (unsigned i = 0; i != MaskNumElts; ++i) {

+ if (Mask[i] < 0) {

+ Ops.push_back(DAG.getUNDEF(EltVT));

+ } else {

+ int Idx = Mask[i];

+ if (Idx < (int)SrcNumElts)

+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),

+ EltVT, Src1, DAG.getConstant(Idx, PtrVT)));

+ else

+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),

+ EltVT, Src2,

+ DAG.getConstant(Idx - SrcNumElts, PtrVT)));

+ }

+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),

+ VT, &Ops[0], Ops.size()));

+void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {

+ const Value *Op0 = I.getOperand(0);

+ const Value *Op1 = I.getOperand(1);

+ const Type *AggTy = I.getType();

+ const Type *ValTy = Op1->getType();

+ bool IntoUndef = isa<UndefValue>(Op0);

+ bool FromUndef = isa<UndefValue>(Op1);

+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,

+ I.idx_begin(), I.idx_end());

+ SmallVector<MVT, 4> AggValueVTs;

+ ComputeValueVTs(TLI, AggTy, AggValueVTs);

+ SmallVector<MVT, 4> ValValueVTs;

+ ComputeValueVTs(TLI, ValTy, ValValueVTs);

+ unsigned NumAggValues = AggValueVTs.size();

+ unsigned NumValValues = ValValueVTs.size();

+ SmallVector<SDValue, 4> Values(NumAggValues);

+ SDValue Agg = getValue(Op0);

+ SDValue Val = getValue(Op1);

+ unsigned i = 0;

+ // Copy the beginning value(s) from the original aggregate.

+ for (; i != LinearIndex; ++i)

+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :

+ SDValue(Agg.getNode(), Agg.getResNo() + i);

+ // Copy values from the inserted value(s).

+ for (; i != LinearIndex + NumValValues; ++i)

+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :

+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);

+ // Copy remaining value(s) from the original aggregate.

+ for (; i != NumAggValues; ++i)

+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :

+ SDValue(Agg.getNode(), Agg.getResNo() + i);

+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),

+ DAG.getVTList(&AggValueVTs[0], NumAggValues),

+ &Values[0], NumAggValues));

+void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {

+ const Value *Op0 = I.getOperand(0);

+ const Type *AggTy = Op0->getType();

+ const Type *ValTy = I.getType();

+ bool OutOfUndef = isa<UndefValue>(Op0);

+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,

+ I.idx_begin(), I.idx_end());

+ SmallVector<MVT, 4> ValValueVTs;

+ ComputeValueVTs(TLI, ValTy, ValValueVTs);

+ unsigned NumValValues = ValValueVTs.size();

+ SmallVector<SDValue, 4> Values(NumValValues);

+ SDValue Agg = getValue(Op0);

+ // Copy out the selected value(s).

+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)

+ Values[i - LinearIndex] =

+ OutOfUndef ?

+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :

+ SDValue(Agg.getNode(), Agg.getResNo() + i);

+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),

+ DAG.getVTList(&ValValueVTs[0], NumValValues),

+ &Values[0], NumValValues));

+void SelectionDAGLowering::visitGetElementPtr(User &I) {

+ SDValue N = getValue(I.getOperand(0));

+ const Type *Ty = I.getOperand(0)->getType();

+ for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();

+ OI != E; ++OI) {

+ Value *Idx = *OI;

+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {

+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();

+ if (Field) {

+ // N = N + Offset

+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);

+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,

+ DAG.getIntPtrConstant(Offset));

+ }

+ Ty = StTy->getElementType(Field);

+ } else {

+ Ty = cast<SequentialType>(Ty)->getElementType();

+ // If this is a constant subscript, handle it quickly.

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {

+ if (CI->getZExtValue() == 0) continue;

+ uint64_t Offs =

+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();

+ SDValue OffsVal;

+ unsigned PtrBits = TLI.getPointerTy().getSizeInBits();

+ if (PtrBits < 64) {

+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ TLI.getPointerTy(),

+ DAG.getConstant(Offs, MVT::i64));

+ } else

+ OffsVal = DAG.getIntPtrConstant(Offs);

+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,

+ OffsVal);

+ continue;

+ }

+ // N = N + Idx * ElementSize;

+ uint64_t ElementSize = TD->getTypeAllocSize(Ty);

+ SDValue IdxN = getValue(Idx);

+ // If the index is smaller or larger than intptr_t, truncate or extend

+ // it.

+ if (IdxN.getValueType().bitsLT(N.getValueType()))

+ IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(),

+ N.getValueType(), IdxN);

+ else if (IdxN.getValueType().bitsGT(N.getValueType()))

+ IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ N.getValueType(), IdxN);

+ // If this is a multiply by a power of two, turn it into a shl

+ // immediately. This is a very common case.

+ if (ElementSize != 1) {

+ if (isPowerOf2_64(ElementSize)) {

+ unsigned Amt = Log2_64(ElementSize);

+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),

+ N.getValueType(), IdxN,

+ DAG.getConstant(Amt, TLI.getPointerTy()));

+ } else {

+ SDValue Scale = DAG.getIntPtrConstant(ElementSize);

+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),

+ N.getValueType(), IdxN, Scale);

+ }

+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),

+ N.getValueType(), N, IdxN);

+ }

+ setValue(&I, N);

+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {

+ // If this is a fixed sized alloca in the entry block of the function,

+ // allocate it statically on the stack.

+ if (FuncInfo.StaticAllocaMap.count(&I))

+ return; // getValue will auto-populate this.

+ const Type *Ty = I.getAllocatedType();

+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);

+ unsigned Align =

+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),

+ I.getAlignment());

+ SDValue AllocSize = getValue(I.getArraySize());

+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),

+ AllocSize,

+ DAG.getConstant(TySize, AllocSize.getValueType()));

+ MVT IntPtr = TLI.getPointerTy();

+ if (IntPtr.bitsLT(AllocSize.getValueType()))

+ AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),

+ IntPtr, AllocSize);

+ else if (IntPtr.bitsGT(AllocSize.getValueType()))

+ AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),

+ IntPtr, AllocSize);

+ // Handle alignment. If the requested alignment is less than or equal to

+ // the stack alignment, ignore it. If the size is greater than or equal to

+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.

+ unsigned StackAlign =

+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();

+ if (Align <= StackAlign)

+ Align = 0;

+ // Round the size of the allocation up to the stack alignment size

+ // by add SA-1 to the size.

+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),

+ AllocSize.getValueType(), AllocSize,

+ DAG.getIntPtrConstant(StackAlign-1));

+ // Mask out the low bits for alignment purposes.

+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),

+ AllocSize.getValueType(), AllocSize,

+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));

+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };

+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);

+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),

+ VTs, Ops, 3);

+ setValue(&I, DSA);

+ DAG.setRoot(DSA.getValue(1));

+ // Inform the Frame Information that we have just allocated a variable-sized

+ // object.

+ CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();

+void SelectionDAGLowering::visitLoad(LoadInst &I) {

+ const Value *SV = I.getOperand(0);

+ SDValue Ptr = getValue(SV);

+ const Type *Ty = I.getType();

+ bool isVolatile = I.isVolatile();

+ unsigned Alignment = I.getAlignment();

+ SmallVector<MVT, 4> ValueVTs;

+ SmallVector<uint64_t, 4> Offsets;

+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);

+ unsigned NumValues = ValueVTs.size();

+ if (NumValues == 0)

+ return;

+ SDValue Root;

+ bool ConstantMemory = false;

+ if (I.isVolatile())

+ // Serialize volatile loads with other side effects.

+ Root = getRoot();

+ else if (AA->pointsToConstantMemory(SV)) {

+ // Do not serialize (non-volatile) loads of constant memory with anything.

+ Root = DAG.getEntryNode();

+ ConstantMemory = true;

+ } else {

+ // Do not serialize non-volatile loads against each other.

+ Root = DAG.getRoot();

+ }

+ SmallVector<SDValue, 4> Values(NumValues);

+ SmallVector<SDValue, 4> Chains(NumValues);

+ MVT PtrVT = Ptr.getValueType();

+ for (unsigned i = 0; i != NumValues; ++i) {

+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,

+ DAG.getNode(ISD::ADD, getCurDebugLoc(),

+ PtrVT, Ptr,

+ DAG.getConstant(Offsets[i], PtrVT)),

+ SV, Offsets[i],

+ isVolatile, Alignment);

+ Values[i] = L;

+ Chains[i] = L.getValue(1);

+ }

+ if (!ConstantMemory) {

+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),

+ MVT::Other,

+ &Chains[0], NumValues);

+ if (isVolatile)

+ DAG.setRoot(Chain);

+ else

+ PendingLoads.push_back(Chain);

+ }

+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),

+ DAG.getVTList(&ValueVTs[0], NumValues),

+ &Values[0], NumValues));

+void SelectionDAGLowering::visitStore(StoreInst &I) {

+ Value *SrcV = I.getOperand(0);

+ Value *PtrV = I.getOperand(1);

+ SmallVector<MVT, 4> ValueVTs;

+ SmallVector<uint64_t, 4> Offsets;

+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);

+ unsigned NumValues = ValueVTs.size();

+ if (NumValues == 0)

+ return;

+ // Get the lowered operands. Note that we do this after

+ // checking if NumResults is zero, because with zero results

+ // the operands won't have values in the map.

+ SDValue Src = getValue(SrcV);

+ SDValue Ptr = getValue(PtrV);

+ SDValue Root = getRoot();

+ SmallVector<SDValue, 4> Chains(NumValues);

+ MVT PtrVT = Ptr.getValueType();

+ bool isVolatile = I.isVolatile();

+ unsigned Alignment = I.getAlignment();

+ for (unsigned i = 0; i != NumValues; ++i)

+ Chains[i] = DAG.getStore(Root, getCurDebugLoc(),

+ SDValue(Src.getNode(), Src.getResNo() + i),

+ DAG.getNode(ISD::ADD, getCurDebugLoc(),

+ PtrVT, Ptr,

+ DAG.getConstant(Offsets[i], PtrVT)),

+ PtrV, Offsets[i],

+ isVolatile, Alignment);

+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),

+ MVT::Other, &Chains[0], NumValues));

+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC

+/// node.

+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,

+ unsigned Intrinsic) {

+ bool HasChain = !I.doesNotAccessMemory();

+ bool OnlyLoad = HasChain && I.onlyReadsMemory();

+ // Build the operand list.

+ SmallVector<SDValue, 8> Ops;

+ if (HasChain) { // If this intrinsic has side-effects, chainify it.

+ if (OnlyLoad) {

+ // We don't need to serialize loads against other loads.

+ Ops.push_back(DAG.getRoot());

+ } else {

+ Ops.push_back(getRoot());

+ }

+ // Info is set by getTgtMemInstrinsic

+ TargetLowering::IntrinsicInfo Info;

+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);

+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.

+ if (!IsTgtIntrinsic)

+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));

+ // Add all operands of the call to the operand list.

+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {

+ SDValue Op = getValue(I.getOperand(i));

+ assert(TLI.isTypeLegal(Op.getValueType()) &&

+ "Intrinsic uses a non-legal type?");

+ Ops.push_back(Op);

+ }

+ std::vector<MVT> VTArray;

+ if (I.getType() != Type::VoidTy) {

+ MVT VT = TLI.getValueType(I.getType());

+ if (VT.isVector()) {

+ const VectorType *DestTy = cast<VectorType>(I.getType());

+ MVT EltVT = TLI.getValueType(DestTy->getElementType());

+ VT = MVT::getVectorVT(EltVT, DestTy->getNumElements());

+ assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");

+ }

+ assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");

+ VTArray.push_back(VT);

+ }

+ if (HasChain)

+ VTArray.push_back(MVT::Other);

+ SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());

+ // Create the node.

+ SDValue Result;

+ if (IsTgtIntrinsic) {

+ // This is target intrinsic that touches memory

+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),

+ VTs, &Ops[0], Ops.size(),

+ Info.memVT, Info.ptrVal, Info.offset,

+ Info.align, Info.vol,

+ Info.readMem, Info.writeMem);

+ }

+ else if (!HasChain)

+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),

+ VTs, &Ops[0], Ops.size());

+ else if (I.getType() != Type::VoidTy)

+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),

+ VTs, &Ops[0], Ops.size());

+ else

+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),

+ VTs, &Ops[0], Ops.size());

+ if (HasChain) {

+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);

+ if (OnlyLoad)

+ PendingLoads.push_back(Chain);

+ else

+ DAG.setRoot(Chain);

+ }

+ if (I.getType() != Type::VoidTy) {

+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {

+ MVT VT = TLI.getValueType(PTy);

+ Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);

+ }

+ setValue(&I, Result);

+ }

+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.

+static GlobalVariable *ExtractTypeInfo(Value *V) {

+ V = V->stripPointerCasts();

+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);

+ assert ((GV || isa<ConstantPointerNull>(V)) &&

+ "TypeInfo must be a global variable or NULL");

+ return GV;

+namespace llvm {

+/// AddCatchInfo - Extract the personality and type infos from an eh.selector

+/// call, and add them to the specified machine basic block.

+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,

+ MachineBasicBlock *MBB) {

+ // Inform the MachineModuleInfo of the personality for this landing pad.

+ ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));

+ assert(CE->getOpcode() == Instruction::BitCast &&

+ isa<Function>(CE->getOperand(0)) &&

+ "Personality should be a function");

+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));

+ // Gather all the type infos for this landing pad and pass them along to

+ // MachineModuleInfo.

+ std::vector<GlobalVariable *> TyInfo;

+ unsigned N = I.getNumOperands();

+ for (unsigned i = N - 1; i > 2; --i) {

+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {

+ unsigned FilterLength = CI->getZExtValue();

+ unsigned FirstCatch = i + FilterLength + !FilterLength;

+ assert (FirstCatch <= N && "Invalid filter length");

+ if (FirstCatch < N) {

+ TyInfo.reserve(N - FirstCatch);

+ for (unsigned j = FirstCatch; j < N; ++j)

+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));

+ MMI->addCatchTypeInfo(MBB, TyInfo);

+ TyInfo.clear();

+ }

+ if (!FilterLength) {

+ // Cleanup.

+ MMI->addCleanup(MBB);

+ } else {

+ // Filter.

+ TyInfo.reserve(FilterLength - 1);

+ for (unsigned j = i + 1; j < FirstCatch; ++j)

+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));

+ MMI->addFilterTypeInfo(MBB, TyInfo);

+ TyInfo.clear();

+ }

+ N = i;

+ }

+ if (N > 3) {

+ TyInfo.reserve(N - 3);

+ for (unsigned j = 3; j < N; ++j)

+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));

+ MMI->addCatchTypeInfo(MBB, TyInfo);

+ }

+/// GetSignificand - Get the significand and build it into a floating-point

+/// number with exponent of 1:

+///

+/// Op = (Op & 0x007fffff) | 0x3f800000;

+///

+/// where Op is the hexidecimal representation of floating point value.

+static SDValue

+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {

+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,

+ DAG.getConstant(0x007fffff, MVT::i32));

+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,

+ DAG.getConstant(0x3f800000, MVT::i32));

+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);

+/// GetExponent - Get the exponent:

+///

+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);

+///

+/// where Op is the hexidecimal representation of floating point value.

+static SDValue

+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,

+ DebugLoc dl) {

+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,

+ DAG.getConstant(0x7f800000, MVT::i32));

+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,

+ DAG.getConstant(23, TLI.getPointerTy()));

+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,

+ DAG.getConstant(127, MVT::i32));

+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);

+/// getF32Constant - Get 32-bit floating point constant.

+static SDValue

+getF32Constant(SelectionDAG &DAG, unsigned Flt) {

+ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);

+/// Inlined utility function to implement binary input atomic intrinsics for

+/// visitIntrinsicCall: I is a call instruction

+/// Op is the associated NodeType for I

+const char *

+SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {

+ SDValue Root = getRoot();

+ SDValue L =

+ DAG.getAtomic(Op, getCurDebugLoc(),

+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),

+ Root,

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2)),

+ I.getOperand(1));

+ setValue(&I, L);

+ DAG.setRoot(L.getValue(1));

+ return 0;

+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.

+const char *

+SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {

+ SDValue Op1 = getValue(I.getOperand(1));

+ SDValue Op2 = getValue(I.getOperand(2));

+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);

+ SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);

+ setValue(&I, Result);

+ return 0;

+/// visitExp - Lower an exp intrinsic. Handles the special sequences for

+/// limited-precision mode.

+void

+SelectionDAGLowering::visitExp(CallInst &I) {

+ SDValue result;

+ DebugLoc dl = getCurDebugLoc();

+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(1));

+ // Put the exponent in the right bit position for later addition to the

+ // final result:

+ //

+ // #define LOG2OFe 1.4426950f

+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,

+ getF32Constant(DAG, 0x3fb8aa3b));

+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);

+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;

+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);

+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);

+ // IntegerPartOfX <<= 23;

+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,

+ DAG.getConstant(23, TLI.getPointerTy()));

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.997535578f +

+ // (0.735607626f + 0.252464424f * x) * x;

+ //

+ // error 0.0144103317, which is 6 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3e814304));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f3c50c8));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f7f5e7e));

+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);

+ // Add the exponent into the result in integer domain.

+ SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,

+ TwoToFracPartOfX, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999892986f +

+ // (0.696457318f +

+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;

+ //

+ // 0.000107046256 error, which is 13 to 14 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3da235e3));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3e65b8f3));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f324b07));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3f7ff8fd));

+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);

+ // Add the exponent into the result in integer domain.

+ SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,

+ TwoToFracPartOfX, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999999982f +

+ // (0.693148872f +

+ // (0.240227044f +

+ // (0.554906021e-1f +

+ // (0.961591928e-2f +

+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;

+ //

+ // error 2.47208000*10^(-7), which is better than 18 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3924b03e));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3ab24b87));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3c1d8c17));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3d634a1d));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x3e75fe14));

+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);

+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,

+ getF32Constant(DAG, 0x3f317234));

+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);

+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,

+ getF32Constant(DAG, 0x3f800000));

+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::i32, t13);

+ // Add the exponent into the result in integer domain.

+ SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,

+ TwoToFracPartOfX, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FEXP, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)));

+ }

+ setValue(&I, result);

+/// visitLog - Lower a log intrinsic. Handles the special sequences for

+/// limited-precision mode.

+void

+SelectionDAGLowering::visitLog(CallInst &I) {

+ SDValue result;

+ DebugLoc dl = getCurDebugLoc();

+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(1));

+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);

+ // Scale the exponent by log(2) [0.69314718f].

+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);

+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,

+ getF32Constant(DAG, 0x3f317218));

+ // Get the significand and build it into a floating-point number with

+ // exponent of 1.

+ SDValue X = GetSignificand(DAG, Op1, dl);

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // LogofMantissa =

+ // -1.1609546f +

+ // (1.4034025f - 0.23903021f * x) * x;

+ //

+ // error 0.0034276066, which is better than 8 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbe74c456));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3fb3a2b1));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f949a29));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, LogOfMantissa);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // LogOfMantissa =

+ // -1.7417939f +

+ // (2.8212026f +

+ // (-1.4699568f +

+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;

+ //

+ // error 0.000061011436, which is 14 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbd67b6d6));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3ee4f4b8));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3fbc278b));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x40348e95));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3fdef31a));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, LogOfMantissa);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // LogOfMantissa =

+ // -2.1072184f +

+ // (4.2372794f +

+ // (-3.7029485f +

+ // (2.2781945f +

+ // (-0.87823314f +

+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;

+ //

+ // error 0.0000023660568, which is better than 18 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbc91e5ac));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3e4350aa));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f60d3e3));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x4011cdf0));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x406cfd1c));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x408797cb));

+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);

+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,

+ getF32Constant(DAG, 0x4006dcab));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, LogOfMantissa);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FLOG, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)));

+ }

+ setValue(&I, result);

+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for

+/// limited-precision mode.

+void

+SelectionDAGLowering::visitLog2(CallInst &I) {

+ SDValue result;

+ DebugLoc dl = getCurDebugLoc();

+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(1));

+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);

+ // Get the exponent.

+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);

+ // Get the significand and build it into a floating-point number with

+ // exponent of 1.

+ SDValue X = GetSignificand(DAG, Op1, dl);

+ // Different possible minimax approximations of significand in

+ // floating-point for various degrees of accuracy over [1,2].

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;

+ //

+ // error 0.0049451742, which is more than 7 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbeb08fe0));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x40019463));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3fd6633d));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log2ofMantissa);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // Log2ofMantissa =

+ // -2.51285454f +

+ // (4.07009056f +

+ // (-2.12067489f +

+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;

+ //

+ // error 0.0000876136000, which is better than 13 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbda7262e));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3f25280b));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x4007b923));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x40823e2f));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x4020d29c));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log2ofMantissa);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // Log2ofMantissa =

+ // -3.0400495f +

+ // (6.1129976f +

+ // (-5.3420409f +

+ // (3.2865683f +

+ // (-1.2669343f +

+ // (0.27515199f -

+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;

+ //

+ // error 0.0000018516, which is better than 18 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbcd2769e));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3e8ce0b9));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3fa22ae7));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x40525723));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x40aaf200));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x40c39dad));

+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);

+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,

+ getF32Constant(DAG, 0x4042902c));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log2ofMantissa);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FLOG2, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)));

+ }

+ setValue(&I, result);

+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for

+/// limited-precision mode.

+void

+SelectionDAGLowering::visitLog10(CallInst &I) {

+ SDValue result;

+ DebugLoc dl = getCurDebugLoc();

+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(1));

+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);

+ // Scale the exponent by log10(2) [0.30102999f].

+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);

+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,

+ getF32Constant(DAG, 0x3e9a209a));

+ // Get the significand and build it into a floating-point number with

+ // exponent of 1.

+ SDValue X = GetSignificand(DAG, Op1, dl);

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // Log10ofMantissa =

+ // -0.50419619f +

+ // (0.60948995f - 0.10380950f * x) * x;

+ //

+ // error 0.0014886165, which is 6 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0xbdd49a13));

+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3f1c0789));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f011300));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log10ofMantissa);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // Log10ofMantissa =

+ // -0.64831180f +

+ // (0.91751397f +

+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;

+ //

+ // error 0.00019228036, which is better than 12 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3d431f31));

+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3ea21fb2));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f6ae232));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f25f7c3));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log10ofMantissa);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // Log10ofMantissa =

+ // -0.84299375f +

+ // (1.5327582f +

+ // (-1.0688956f +

+ // (0.49102474f +

+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;

+ //

+ // error 0.0000037995730, which is better than 18 bits

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3c5d51ce));

+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,

+ getF32Constant(DAG, 0x3e00685a));

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3efb6798));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f88d192));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3fc4316c));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x3f57ce70));

+ result = DAG.getNode(ISD::FADD, dl,

+ MVT::f32, LogOfExponent, Log10ofMantissa);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FLOG10, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)));

+ }

+ setValue(&I, result);

+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for

+/// limited-precision mode.

+void

+SelectionDAGLowering::visitExp2(CallInst &I) {

+ SDValue result;

+ DebugLoc dl = getCurDebugLoc();

+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(1));

+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);

+ // FractionalPartOfX = x - (float)IntegerPartOfX;

+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);

+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);

+ // IntegerPartOfX <<= 23;

+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,

+ DAG.getConstant(23, TLI.getPointerTy()));

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.997535578f +

+ // (0.735607626f + 0.252464424f * x) * x;

+ //

+ // error 0.0144103317, which is 6 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3e814304));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f3c50c8));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f7f5e7e));

+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999892986f +

+ // (0.696457318f +

+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;

+ //

+ // error 0.000107046256, which is 13 to 14 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3da235e3));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3e65b8f3));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f324b07));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3f7ff8fd));

+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999999982f +

+ // (0.693148872f +

+ // (0.240227044f +

+ // (0.554906021e-1f +

+ // (0.961591928e-2f +

+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;

+ // error 2.47208000*10^(-7), which is better than 18 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3924b03e));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3ab24b87));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3c1d8c17));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3d634a1d));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x3e75fe14));

+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);

+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,

+ getF32Constant(DAG, 0x3f317234));

+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);

+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,

+ getF32Constant(DAG, 0x3f800000));

+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FEXP2, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)));

+ }

+ setValue(&I, result);

+/// visitPow - Lower a pow intrinsic. Handles the special sequences for

+/// limited-precision mode with x == 10.0f.

+void

+SelectionDAGLowering::visitPow(CallInst &I) {

+ SDValue result;

+ Value *Val = I.getOperand(1);

+ DebugLoc dl = getCurDebugLoc();

+ bool IsExp10 = false;

+ if (getValue(Val).getValueType() == MVT::f32 &&

+ getValue(I.getOperand(2)).getValueType() == MVT::f32 &&

+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {

+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {

+ APFloat Ten(10.0f);

+ IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);

+ }

+ if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

+ SDValue Op = getValue(I.getOperand(2));

+ // Put the exponent in the right bit position for later addition to the

+ // final result:

+ //

+ // #define LOG2OF10 3.3219281f

+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);

+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,

+ getF32Constant(DAG, 0x40549a78));

+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);

+ // FractionalPartOfX = x - (float)IntegerPartOfX;

+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);

+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);

+ // IntegerPartOfX <<= 23;

+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,

+ DAG.getConstant(23, TLI.getPointerTy()));

+ if (LimitFloatPrecision <= 6) {

+ // For floating-point precision of 6:

+ //

+ // twoToFractionalPartOfX =

+ // 0.997535578f +

+ // (0.735607626f + 0.252464424f * x) * x;

+ //

+ // error 0.0144103317, which is 6 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3e814304));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3f3c50c8));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f7f5e7e));

+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {

+ // For floating-point precision of 12:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999892986f +

+ // (0.696457318f +

+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;

+ //

+ // error 0.000107046256, which is 13 to 14 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3da235e3));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3e65b8f3));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3f324b07));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3f7ff8fd));

+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18

+ // For floating-point precision of 18:

+ //

+ // TwoToFractionalPartOfX =

+ // 0.999999982f +

+ // (0.693148872f +

+ // (0.240227044f +

+ // (0.554906021e-1f +

+ // (0.961591928e-2f +

+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;

+ // error 2.47208000*10^(-7), which is better than 18 bits

+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,

+ getF32Constant(DAG, 0x3924b03e));

+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,

+ getF32Constant(DAG, 0x3ab24b87));

+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);

+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,

+ getF32Constant(DAG, 0x3c1d8c17));

+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);

+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,

+ getF32Constant(DAG, 0x3d634a1d));

+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);

+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,

+ getF32Constant(DAG, 0x3e75fe14));

+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);

+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,

+ getF32Constant(DAG, 0x3f317234));

+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);

+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,

+ getF32Constant(DAG, 0x3f800000));

+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);

+ SDValue TwoToFractionalPartOfX =

+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);

+ result = DAG.getNode(ISD::BIT_CONVERT, dl,

+ MVT::f32, TwoToFractionalPartOfX);

+ }

+ } else {

+ // No special expansion.

+ result = DAG.getNode(ISD::FPOW, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2)));

+ }

+ setValue(&I, result);

+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If

+/// we want to emit this as a call to a named external function, return the name

+/// otherwise lower it and return null.

+const char *

+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {

+ DebugLoc dl = getCurDebugLoc();

+ switch (Intrinsic) {

+ default:

+ // By default, turn this into a target intrinsic node.

+ visitTargetIntrinsic(I, Intrinsic);

+ return 0;

+ case Intrinsic::vastart: visitVAStart(I); return 0;

+ case Intrinsic::vaend: visitVAEnd(I); return 0;

+ case Intrinsic::vacopy: visitVACopy(I); return 0;

+ case Intrinsic::returnaddress:

+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::frameaddress:

+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::setjmp:

+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();

+ break;

+ case Intrinsic::longjmp:

+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();

+ break;

+ case Intrinsic::memcpy: {

+ SDValue Op1 = getValue(I.getOperand(1));

+ SDValue Op2 = getValue(I.getOperand(2));

+ SDValue Op3 = getValue(I.getOperand(3));

+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();

+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,

+ I.getOperand(1), 0, I.getOperand(2), 0));

+ return 0;

+ }

+ case Intrinsic::memset: {

+ SDValue Op1 = getValue(I.getOperand(1));

+ SDValue Op2 = getValue(I.getOperand(2));

+ SDValue Op3 = getValue(I.getOperand(3));

+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();

+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,

+ I.getOperand(1), 0));

+ return 0;

+ }

+ case Intrinsic::memmove: {

+ SDValue Op1 = getValue(I.getOperand(1));

+ SDValue Op2 = getValue(I.getOperand(2));

+ SDValue Op3 = getValue(I.getOperand(3));

+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();

+ // If the source and destination are known to not be aliases, we can

+ // lower memmove as memcpy.

+ uint64_t Size = -1ULL;

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))

+ Size = C->getZExtValue();

+ if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==

+ AliasAnalysis::NoAlias) {

+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,

+ I.getOperand(1), 0, I.getOperand(2), 0));

+ return 0;

+ }

+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,

+ I.getOperand(1), 0, I.getOperand(2), 0));

+ return 0;

+ }

+ case Intrinsic::dbg_stoppoint: {

+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);

+ if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) {

+ MachineFunction &MF = DAG.getMachineFunction();

+ DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));

+ DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(),

+ SPI.getLine(), SPI.getColumn()));

+ setCurDebugLoc(Loc);

+ if (OptLevel == CodeGenOpt::None)

+ DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),

+ SPI.getLine(),

+ SPI.getColumn(),

+ SPI.getContext()));

+ }

+ return 0;

+ }

+ case Intrinsic::dbg_region_start: {

+ DwarfWriter *DW = DAG.getDwarfWriter();

+ DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);

+ if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) &&

+ DW && DW->ShouldEmitDwarfDebug()) {

+ unsigned LabelID =

+ DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));

+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),

+ getRoot(), LabelID));

+ }

+ return 0;

+ }

+ case Intrinsic::dbg_region_end: {

+ DwarfWriter *DW = DAG.getDwarfWriter();

+ DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);

+ if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) &&

+ DW && DW->ShouldEmitDwarfDebug()) {

+ MachineFunction &MF = DAG.getMachineFunction();

+ DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));

+ if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) {

+ unsigned LabelID =

+ DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));

+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),

+ getRoot(), LabelID));

+ } else {

+ // This is end of inlined function. Debugging information for inlined

+ // function is not handled yet (only supported by FastISel).

+ if (OptLevel == CodeGenOpt::None) {

+ unsigned ID = DW->RecordInlinedFnEnd(Subprogram);

+ if (ID != 0)

+ // Returned ID is 0 if this is unbalanced "end of inlined

+ // scope". This could happen if optimizer eats dbg intrinsics or

+ // "beginning of inlined scope" is not recoginized due to missing

+ // location info. In such cases, do ignore this region.end.

+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),

+ getRoot(), ID));

+ }

+ return 0;

+ }

+ case Intrinsic::dbg_func_start: {

+ DwarfWriter *DW = DAG.getDwarfWriter();

+ DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);

+ Value *SP = FSI.getSubprogram();

+ if (!DIDescriptor::ValidDebugInfo(SP, OptLevel))

+ return 0;

+ MachineFunction &MF = DAG.getMachineFunction();

+ if (OptLevel == CodeGenOpt::None) {

+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what

+ // (most?) gdb expects.

+ DebugLoc PrevLoc = CurDebugLoc;

+ DISubprogram Subprogram(cast<GlobalVariable>(SP));

+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();

+ if (!Subprogram.describes(MF.getFunction())) {

+ // This is a beginning of an inlined function.

+ // If llvm.dbg.func.start is seen in a new block before any

+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.

+ // FIXME : Why DebugLoc is reset at the beginning of each block ?

+ if (PrevLoc.isUnknown())

+ return 0;

+ // Record the source line.

+ unsigned Line = Subprogram.getLineNumber();

+ setCurDebugLoc(DebugLoc::get(

+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));

+ if (DW && DW->ShouldEmitDwarfDebug()) {

+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);

+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,

+ DICompileUnit(PrevLocTpl.CompileUnit),

+ PrevLocTpl.Line,

+ PrevLocTpl.Col);

+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),

+ getRoot(), LabelID));

+ }

+ } else {

+ // Record the source line.

+ unsigned Line = Subprogram.getLineNumber();

+ MF.setDefaultDebugLoc(DebugLoc::get(

+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));

+ if (DW && DW->ShouldEmitDwarfDebug()) {

+ // llvm.dbg.func_start also defines beginning of function scope.

+ DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));

+ }

+ } else {

+ DISubprogram Subprogram(cast<GlobalVariable>(SP));

+ std::string SPName;

+ Subprogram.getLinkageName(SPName);

+ if (!SPName.empty()

+ && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) {

+ // This is beginning of inlined function. Debugging information for

+ // inlined function is not handled yet (only supported by FastISel).

+ return 0;

+ }

+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is

+ // what (most?) gdb expects.

+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();

+ // Record the source line but does not create a label for the normal

+ // function start. It will be emitted at asm emission time. However,

+ // create a label if this is a beginning of inlined function.

+ unsigned Line = Subprogram.getLineNumber();

+ setCurDebugLoc(DebugLoc::get(

+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));

+ // FIXME - Start new region because llvm.dbg.func_start also defines

+ // beginning of function scope.

+ }

+ return 0;

+ }

+ case Intrinsic::dbg_declare: {

+ if (OptLevel == CodeGenOpt::None) {

+ DbgDeclareInst &DI = cast<DbgDeclareInst>(I);

+ Value *Variable = DI.getVariable();

+ if (DIDescriptor::ValidDebugInfo(Variable, OptLevel))

+ DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),

+ getValue(DI.getAddress()), getValue(Variable)));

+ } else {

+ // FIXME: Do something sensible here when we support debug declare.

+ }

+ return 0;

+ }

+ case Intrinsic::eh_exception: {

+ // Insert the EXCEPTIONADDR instruction.

+ assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");

+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);

+ SDValue Ops[1];

+ Ops[0] = DAG.getRoot();

+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);

+ setValue(&I, Op);

+ DAG.setRoot(Op.getValue(1));

+ return 0;

+ }

+ case Intrinsic::eh_selector_i32:

+ case Intrinsic::eh_selector_i64: {

+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();

+ MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ?

+ MVT::i32 : MVT::i64);

+ if (MMI) {

+ if (CurMBB->isLandingPad())

+ AddCatchInfo(I, MMI, CurMBB);

+ else {

+#ifndef NDEBUG

+ FuncInfo.CatchInfoLost.insert(&I);

+#endif

+ // FIXME: Mark exception selector register as live in. Hack for PR1508.

+ unsigned Reg = TLI.getExceptionSelectorRegister();

+ if (Reg) CurMBB->addLiveIn(Reg);

+ }

+ // Insert the EHSELECTION instruction.

+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);

+ SDValue Ops[2];

+ Ops[0] = getValue(I.getOperand(1));

+ Ops[1] = getRoot();

+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);

+ setValue(&I, Op);

+ DAG.setRoot(Op.getValue(1));

+ } else {

+ setValue(&I, DAG.getConstant(0, VT));

+ }

+ return 0;

+ }

+ case Intrinsic::eh_typeid_for_i32:

+ case Intrinsic::eh_typeid_for_i64: {

+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();

+ MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ?

+ MVT::i32 : MVT::i64);

+ if (MMI) {

+ // Find the type id for the given typeinfo.

+ GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));

+ unsigned TypeID = MMI->getTypeIDFor(GV);

+ setValue(&I, DAG.getConstant(TypeID, VT));

+ } else {

+ // Return something different to eh_selector.

+ setValue(&I, DAG.getConstant(1, VT));

+ }

+ return 0;

+ }

+ case Intrinsic::eh_return_i32:

+ case Intrinsic::eh_return_i64:

+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {

+ MMI->setCallsEHReturn(true);

+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,

+ MVT::Other,

+ getControlRoot(),

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2))));

+ } else {

+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));

+ }

+ return 0;

+ case Intrinsic::eh_unwind_init:

+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {

+ MMI->setCallsUnwindInit(true);

+ }

+ return 0;

+ case Intrinsic::eh_dwarf_cfa: {

+ MVT VT = getValue(I.getOperand(1)).getValueType();

+ SDValue CfaArg;

+ if (VT.bitsGT(TLI.getPointerTy()))

+ CfaArg = DAG.getNode(ISD::TRUNCATE, dl,

+ TLI.getPointerTy(), getValue(I.getOperand(1)));

+ else

+ CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl,

+ TLI.getPointerTy(), getValue(I.getOperand(1)));

+ SDValue Offset = DAG.getNode(ISD::ADD, dl,

+ TLI.getPointerTy(),

+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,

+ TLI.getPointerTy()),

+ CfaArg);

+ setValue(&I, DAG.getNode(ISD::ADD, dl,

+ TLI.getPointerTy(),

+ DAG.getNode(ISD::FRAMEADDR, dl,

+ TLI.getPointerTy(),

+ DAG.getConstant(0,

+ TLI.getPointerTy())),

+ Offset));

+ return 0;

+ }

+ case Intrinsic::convertff:

+ case Intrinsic::convertfsi:

+ case Intrinsic::convertfui:

+ case Intrinsic::convertsif:

+ case Intrinsic::convertuif:

+ case Intrinsic::convertss:

+ case Intrinsic::convertsu:

+ case Intrinsic::convertus:

+ case Intrinsic::convertuu: {

+ ISD::CvtCode Code = ISD::CVT_INVALID;

+ switch (Intrinsic) {

+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;

+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;

+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;

+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;

+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;

+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;

+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;

+ case Intrinsic::convertus: Code = ISD::CVT_US; break;

+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;

+ }

+ MVT DestVT = TLI.getValueType(I.getType());

+ Value* Op1 = I.getOperand(1);

+ setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),

+ DAG.getValueType(DestVT),

+ DAG.getValueType(getValue(Op1).getValueType()),

+ getValue(I.getOperand(2)),

+ getValue(I.getOperand(3)),

+ Code));

+ return 0;

+ }

+ case Intrinsic::sqrt:

+ setValue(&I, DAG.getNode(ISD::FSQRT, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::powi:

+ setValue(&I, DAG.getNode(ISD::FPOWI, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2))));

+ return 0;

+ case Intrinsic::sin:

+ setValue(&I, DAG.getNode(ISD::FSIN, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::cos:

+ setValue(&I, DAG.getNode(ISD::FCOS, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::log:

+ visitLog(I);

+ return 0;

+ case Intrinsic::log2:

+ visitLog2(I);

+ return 0;

+ case Intrinsic::log10:

+ visitLog10(I);

+ return 0;

+ case Intrinsic::exp:

+ visitExp(I);

+ return 0;

+ case Intrinsic::exp2:

+ visitExp2(I);

+ return 0;

+ case Intrinsic::pow:

+ visitPow(I);

+ return 0;

+ case Intrinsic::pcmarker: {

+ SDValue Tmp = getValue(I.getOperand(1));

+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));

+ return 0;

+ }

+ case Intrinsic::readcyclecounter: {

+ SDValue Op = getRoot();

+ SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,

+ DAG.getVTList(MVT::i64, MVT::Other),

+ &Op, 1);

+ setValue(&I, Tmp);

+ DAG.setRoot(Tmp.getValue(1));

+ return 0;

+ }

+ case Intrinsic::part_select: {

+ // Currently not implemented: just abort

+ assert(0 && "part_select intrinsic not implemented");

+ abort();

+ }

+ case Intrinsic::part_set: {

+ // Currently not implemented: just abort

+ assert(0 && "part_set intrinsic not implemented");

+ abort();

+ }

+ case Intrinsic::bswap:

+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,

+ getValue(I.getOperand(1)).getValueType(),

+ getValue(I.getOperand(1))));

+ return 0;

+ case Intrinsic::cttz: {

+ SDValue Arg = getValue(I.getOperand(1));

+ MVT Ty = Arg.getValueType();

+ SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);

+ setValue(&I, result);

+ return 0;

+ }

+ case Intrinsic::ctlz: {

+ SDValue Arg = getValue(I.getOperand(1));

+ MVT Ty = Arg.getValueType();

+ SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);

+ setValue(&I, result);

+ return 0;

+ }

+ case Intrinsic::ctpop: {

+ SDValue Arg = getValue(I.getOperand(1));

+ MVT Ty = Arg.getValueType();

+ SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);

+ setValue(&I, result);

+ return 0;

+ }

+ case Intrinsic::stacksave: {

+ SDValue Op = getRoot();

+ SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,

+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);

+ setValue(&I, Tmp);

+ DAG.setRoot(Tmp.getValue(1));

+ return 0;

+ }

+ case Intrinsic::stackrestore: {

+ SDValue Tmp = getValue(I.getOperand(1));

+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));

+ return 0;

+ }

+ case Intrinsic::stackprotector: {

+ // Emit code into the DAG to store the stack guard onto the stack.

+ MachineFunction &MF = DAG.getMachineFunction();

+ MachineFrameInfo *MFI = MF.getFrameInfo();

+ MVT PtrTy = TLI.getPointerTy();

+ SDValue Src = getValue(I.getOperand(1)); // The guard's value.

+ AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));

+ int FI = FuncInfo.StaticAllocaMap[Slot];

+ MFI->setStackProtectorIndex(FI);

+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);

+ // Store the stack protector onto the stack.

+ SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,

+ PseudoSourceValue::getFixedStack(FI),

+ 0, true);

+ setValue(&I, Result);

+ DAG.setRoot(Result);

+ return 0;

+ }

+ case Intrinsic::var_annotation:

+ // Discard annotate attributes

+ return 0;

+ case Intrinsic::init_trampoline: {

+ const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());

+ SDValue Ops[6];

+ Ops[0] = getRoot();

+ Ops[1] = getValue(I.getOperand(1));

+ Ops[2] = getValue(I.getOperand(2));

+ Ops[3] = getValue(I.getOperand(3));

+ Ops[4] = DAG.getSrcValue(I.getOperand(1));

+ Ops[5] = DAG.getSrcValue(F);

+ SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,

+ DAG.getVTList(TLI.getPointerTy(), MVT::Other),

+ Ops, 6);

+ setValue(&I, Tmp);

+ DAG.setRoot(Tmp.getValue(1));

+ return 0;

+ }

+ case Intrinsic::gcroot:

+ if (GFI) {

+ Value *Alloca = I.getOperand(1);

+ Constant *TypeMap = cast<Constant>(I.getOperand(2));

+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());

+ GFI->addStackRoot(FI->getIndex(), TypeMap);

+ }

+ return 0;

+ case Intrinsic::gcread:

+ case Intrinsic::gcwrite:

+ assert(0 && "GC failed to lower gcread/gcwrite intrinsics!");

+ return 0;

+ case Intrinsic::flt_rounds: {

+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));

+ return 0;

+ }

+ case Intrinsic::trap: {

+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));

+ return 0;

+ }

+ case Intrinsic::uadd_with_overflow:

+ return implVisitAluOverflow(I, ISD::UADDO);

+ case Intrinsic::sadd_with_overflow:

+ return implVisitAluOverflow(I, ISD::SADDO);

+ case Intrinsic::usub_with_overflow:

+ return implVisitAluOverflow(I, ISD::USUBO);

+ case Intrinsic::ssub_with_overflow:

+ return implVisitAluOverflow(I, ISD::SSUBO);

+ case Intrinsic::umul_with_overflow:

+ return implVisitAluOverflow(I, ISD::UMULO);

+ case Intrinsic::smul_with_overflow:

+ return implVisitAluOverflow(I, ISD::SMULO);

+ case Intrinsic::prefetch: {

+ SDValue Ops[4];

+ Ops[0] = getRoot();

+ Ops[1] = getValue(I.getOperand(1));

+ Ops[2] = getValue(I.getOperand(2));

+ Ops[3] = getValue(I.getOperand(3));

+ DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));

+ return 0;

+ }

+ case Intrinsic::memory_barrier: {

+ SDValue Ops[6];

+ Ops[0] = getRoot();

+ for (int x = 1; x < 6; ++x)

+ Ops[x] = getValue(I.getOperand(x));

+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));

+ return 0;

+ }

+ case Intrinsic::atomic_cmp_swap: {

+ SDValue Root = getRoot();

+ SDValue L =

+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),

+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),

+ Root,

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2)),

+ getValue(I.getOperand(3)),

+ I.getOperand(1));

+ setValue(&I, L);

+ DAG.setRoot(L.getValue(1));

+ return 0;

+ }

+ case Intrinsic::atomic_load_add:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);

+ case Intrinsic::atomic_load_sub:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);

+ case Intrinsic::atomic_load_or:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);

+ case Intrinsic::atomic_load_xor:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);

+ case Intrinsic::atomic_load_and:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);

+ case Intrinsic::atomic_load_nand:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);

+ case Intrinsic::atomic_load_max:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);

+ case Intrinsic::atomic_load_min:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);

+ case Intrinsic::atomic_load_umin:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);

+ case Intrinsic::atomic_load_umax:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);

+ case Intrinsic::atomic_swap:

+ return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);

+ }

+void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,

+ bool IsTailCall,

+ MachineBasicBlock *LandingPad) {

+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());

+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());

+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();

+ unsigned BeginLabel = 0, EndLabel = 0;

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Args.reserve(CS.arg_size());

+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();

+ i != e; ++i) {

+ SDValue ArgNode = getValue(*i);

+ Entry.Node = ArgNode; Entry.Ty = (*i)->getType();

+ unsigned attrInd = i - CS.arg_begin() + 1;

+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);

+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);

+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);

+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);

+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);

+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);

+ Entry.Alignment = CS.getParamAlignment(attrInd);

+ Args.push_back(Entry);

+ }

+ if (LandingPad && MMI) {

+ // Insert a label before the invoke call to mark the try range. This can be

+ // used to detect deletion of the invoke via the MachineModuleInfo.

+ BeginLabel = MMI->NextLabelID();

+ // Both PendingLoads and PendingExports must be flushed here;

+ // this call might not return.

+ (void)getRoot();

+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),

+ getControlRoot(), BeginLabel));

+ }

+ std::pair<SDValue,SDValue> Result =

+ TLI.LowerCallTo(getRoot(), CS.getType(),

+ CS.paramHasAttr(0, Attribute::SExt),

+ CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),

+ CS.paramHasAttr(0, Attribute::InReg),

+ CS.getCallingConv(),

+ IsTailCall && PerformTailCallOpt,

+ Callee, Args, DAG, getCurDebugLoc());

+ if (CS.getType() != Type::VoidTy)

+ setValue(CS.getInstruction(), Result.first);

+ DAG.setRoot(Result.second);

+ if (LandingPad && MMI) {

+ // Insert a label at the end of the invoke call to mark the try range. This

+ // can be used to detect deletion of the invoke via the MachineModuleInfo.

+ EndLabel = MMI->NextLabelID();

+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),

+ getRoot(), EndLabel));

+ // Inform MachineModuleInfo of range.

+ MMI->addInvoke(LandingPad, BeginLabel, EndLabel);

+ }

+void SelectionDAGLowering::visitCall(CallInst &I) {

+ const char *RenameFn = 0;

+ if (Function *F = I.getCalledFunction()) {

+ if (F->isDeclaration()) {

+ const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();

+ if (II) {

+ if (unsigned IID = II->getIntrinsicID(F)) {

+ RenameFn = visitIntrinsicCall(I, IID);

+ if (!RenameFn)

+ return;

+ }

+ if (unsigned IID = F->getIntrinsicID()) {

+ RenameFn = visitIntrinsicCall(I, IID);

+ if (!RenameFn)

+ return;

+ }

+ // Check for well-known libc/libm calls. If the function is internal, it

+ // can't be a library call.

+ unsigned NameLen = F->getNameLen();

+ if (!F->hasLocalLinkage() && NameLen) {

+ const char *NameStr = F->getNameStart();

+ if (NameStr[0] == 'c' &&

+ ((NameLen == 8 && !strcmp(NameStr, "copysign")) ||

+ (NameLen == 9 && !strcmp(NameStr, "copysignf")))) {

+ if (I.getNumOperands() == 3 && // Basic sanity checks.

+ I.getOperand(1)->getType()->isFloatingPoint() &&

+ I.getType() == I.getOperand(1)->getType() &&

+ I.getType() == I.getOperand(2)->getType()) {

+ SDValue LHS = getValue(I.getOperand(1));

+ SDValue RHS = getValue(I.getOperand(2));

+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),

+ LHS.getValueType(), LHS, RHS));

+ return;

+ }

+ } else if (NameStr[0] == 'f' &&

+ ((NameLen == 4 && !strcmp(NameStr, "fabs")) ||

+ (NameLen == 5 && !strcmp(NameStr, "fabsf")) ||

+ (NameLen == 5 && !strcmp(NameStr, "fabsl")))) {

+ if (I.getNumOperands() == 2 && // Basic sanity checks.

+ I.getOperand(1)->getType()->isFloatingPoint() &&

+ I.getType() == I.getOperand(1)->getType()) {

+ SDValue Tmp = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),

+ Tmp.getValueType(), Tmp));

+ return;

+ }

+ } else if (NameStr[0] == 's' &&

+ ((NameLen == 3 && !strcmp(NameStr, "sin")) ||

+ (NameLen == 4 && !strcmp(NameStr, "sinf")) ||

+ (NameLen == 4 && !strcmp(NameStr, "sinl")))) {

+ if (I.getNumOperands() == 2 && // Basic sanity checks.

+ I.getOperand(1)->getType()->isFloatingPoint() &&

+ I.getType() == I.getOperand(1)->getType()) {

+ SDValue Tmp = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),

+ Tmp.getValueType(), Tmp));

+ return;

+ }

+ } else if (NameStr[0] == 'c' &&

+ ((NameLen == 3 && !strcmp(NameStr, "cos")) ||

+ (NameLen == 4 && !strcmp(NameStr, "cosf")) ||

+ (NameLen == 4 && !strcmp(NameStr, "cosl")))) {

+ if (I.getNumOperands() == 2 && // Basic sanity checks.

+ I.getOperand(1)->getType()->isFloatingPoint() &&

+ I.getType() == I.getOperand(1)->getType()) {

+ SDValue Tmp = getValue(I.getOperand(1));

+ setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),

+ Tmp.getValueType(), Tmp));

+ return;

+ }

+ } else if (isa<InlineAsm>(I.getOperand(0))) {

+ visitInlineAsm(&I);

+ return;

+ }

+ SDValue Callee;

+ if (!RenameFn)

+ Callee = getValue(I.getOperand(0));

+ else

+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());

+ LowerCallTo(&I, Callee, I.isTailCall());

+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from

+/// this value and returns the result as a ValueVT value. This uses

+/// Chain/Flag as the input and updates them for the output Chain/Flag.

+/// If the Flag pointer is NULL, no flag is used.

+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,

+ SDValue &Chain,

+ SDValue *Flag) const {

+ // Assemble the legal parts into the final values.

+ SmallVector<SDValue, 4> Values(ValueVTs.size());

+ SmallVector<SDValue, 8> Parts;

+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {

+ // Copy the legal parts from the registers.

+ MVT ValueVT = ValueVTs[Value];

+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);

+ MVT RegisterVT = RegVTs[Value];

+ Parts.resize(NumRegs);

+ for (unsigned i = 0; i != NumRegs; ++i) {

+ SDValue P;

+ if (Flag == 0)

+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);

+ else {

+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);

+ *Flag = P.getValue(2);

+ }

+ Chain = P.getValue(1);

+ // If the source register was virtual and if we know something about it,

+ // add an assert node.

+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&

+ RegisterVT.isInteger() && !RegisterVT.isVector()) {

+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;

+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();

+ if (FLI.LiveOutRegInfo.size() > SlotNo) {

+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];

+ unsigned RegSize = RegisterVT.getSizeInBits();

+ unsigned NumSignBits = LOI.NumSignBits;

+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();

+ // FIXME: We capture more information than the dag can represent. For

+ // now, just use the tightest assertzext/assertsext possible.

+ bool isSExt = true;

+ MVT FromVT(MVT::Other);

+ if (NumSignBits == RegSize)

+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1

+ else if (NumZeroBits >= RegSize-1)

+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1

+ else if (NumSignBits > RegSize-8)

+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8

+ else if (NumZeroBits >= RegSize-8)

+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8

+ else if (NumSignBits > RegSize-16)

+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16

+ else if (NumZeroBits >= RegSize-16)

+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16

+ else if (NumSignBits > RegSize-32)

+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32

+ else if (NumZeroBits >= RegSize-32)

+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32

+ if (FromVT != MVT::Other) {

+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,

+ RegisterVT, P, DAG.getValueType(FromVT));

+ }

+ Parts[i] = P;

+ }

+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),

+ NumRegs, RegisterVT, ValueVT);

+ Part += NumRegs;

+ Parts.clear();

+ }

+ return DAG.getNode(ISD::MERGE_VALUES, dl,

+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),

+ &Values[0], ValueVTs.size());

+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the

+/// specified value into the registers specified by this object. This uses

+/// Chain/Flag as the input and updates them for the output Chain/Flag.

+/// If the Flag pointer is NULL, no flag is used.

+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,

+ SDValue &Chain, SDValue *Flag) const {

+ // Get the list of the values's legal parts.

+ unsigned NumRegs = Regs.size();

+ SmallVector<SDValue, 8> Parts(NumRegs);

+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {

+ MVT ValueVT = ValueVTs[Value];

+ unsigned NumParts = TLI->getNumRegisters(ValueVT);

+ MVT RegisterVT = RegVTs[Value];

+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),

+ &Parts[Part], NumParts, RegisterVT);

+ Part += NumParts;

+ }

+ // Copy the parts into the registers.

+ SmallVector<SDValue, 8> Chains(NumRegs);

+ for (unsigned i = 0; i != NumRegs; ++i) {

+ SDValue Part;

+ if (Flag == 0)

+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);

+ else {

+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);

+ *Flag = Part.getValue(1);

+ }

+ Chains[i] = Part.getValue(0);

+ }

+ if (NumRegs == 1 || Flag)

+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is

+ // flagged to it. That is the CopyToReg nodes and the user are considered

+ // a single scheduling unit. If we create a TokenFactor and return it as

+ // chain, then the TokenFactor is both a predecessor (operand) of the

+ // user as well as a successor (the TF operands are flagged to the user).

+ // c1, f1 = CopyToReg

+ // c2, f2 = CopyToReg

+ // c3 = TokenFactor c1, c2

+ // ...

+ // = op c3, ..., f2

+ Chain = Chains[NumRegs-1];

+ else

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);

+/// AddInlineAsmOperands - Add this value to the specified inlineasm node

+/// operand list. This adds the code marker and includes the number of

+/// values added into it.

+void RegsForValue::AddInlineAsmOperands(unsigned Code,

+ bool HasMatching,unsigned MatchingIdx,

+ SelectionDAG &DAG,

+ std::vector<SDValue> &Ops) const {

+ MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();

+ assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");

+ unsigned Flag = Code | (Regs.size() << 3);

+ if (HasMatching)

+ Flag |= 0x80000000 | (MatchingIdx << 16);

+ Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));

+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {

+ unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]);

+ MVT RegisterVT = RegVTs[Value];

+ for (unsigned i = 0; i != NumRegs; ++i) {

+ assert(Reg < Regs.size() && "Mismatch in # registers expected");

+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));

+ }

+/// isAllocatableRegister - If the specified register is safe to allocate,

+/// i.e. it isn't a stack pointer or some other special register, return the

+/// register class for the register. Otherwise, return null.

+static const TargetRegisterClass *

+isAllocatableRegister(unsigned Reg, MachineFunction &MF,

+ const TargetLowering &TLI,

+ const TargetRegisterInfo *TRI) {

+ MVT FoundVT = MVT::Other;

+ const TargetRegisterClass *FoundRC = 0;

+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),

+ E = TRI->regclass_end(); RCI != E; ++RCI) {

+ MVT ThisVT = MVT::Other;

+ const TargetRegisterClass *RC = *RCI;

+ // If none of the the value types for this register class are valid, we

+ // can't use it. For example, 64-bit reg classes on 32-bit targets.

+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();

+ I != E; ++I) {

+ if (TLI.isTypeLegal(*I)) {

+ // If we have already found this register in a different register class,

+ // choose the one with the largest VT specified. For example, on

+ // PowerPC, we favor f64 register classes over f32.

+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {

+ ThisVT = *I;

+ break;

+ }

+ if (ThisVT == MVT::Other) continue;

+ // NOTE: This isn't ideal. In particular, this might allocate the

+ // frame pointer in functions that need it (due to them not being taken

+ // out of allocation, because a variable sized allocation hasn't been seen

+ // yet). This is a slight code pessimization, but should still work.

+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),

+ E = RC->allocation_order_end(MF); I != E; ++I)

+ if (*I == Reg) {

+ // We found a matching register class. Keep looking at others in case

+ // we find one with larger registers that this physreg is also in.

+ FoundRC = RC;

+ FoundVT = ThisVT;

+ break;

+ }

+ return FoundRC;

+namespace llvm {

+/// AsmOperandInfo - This contains information for each constraint that we are

+/// lowering.

+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :

+ public TargetLowering::AsmOperandInfo {

+public:

+ /// CallOperand - If this is the result output operand or a clobber

+ /// this is null, otherwise it is the incoming operand to the CallInst.

+ /// This gets modified as the asm is processed.

+ SDValue CallOperand;

+ /// AssignedRegs - If this is a register or register class operand, this

+ /// contains the set of register corresponding to the operand.

+ RegsForValue AssignedRegs;

+ explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)

+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {

+ }

+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers

+ /// busy in OutputRegs/InputRegs.

+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,

+ std::set<unsigned> &OutputRegs,

+ std::set<unsigned> &InputRegs,

+ const TargetRegisterInfo &TRI) const {

+ if (isOutReg) {

+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)

+ MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);

+ }

+ if (isInReg) {

+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)

+ MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);

+ }

+ /// getCallOperandValMVT - Return the MVT of the Value* that this operand

+ /// corresponds to. If there is no Value* for this operand, it returns

+ /// MVT::Other.

+ MVT getCallOperandValMVT(const TargetLowering &TLI,

+ const TargetData *TD) const {

+ if (CallOperandVal == 0) return MVT::Other;

+ if (isa<BasicBlock>(CallOperandVal))

+ return TLI.getPointerTy();

+ const llvm::Type *OpTy = CallOperandVal->getType();

+ // If this is an indirect operand, the operand is a pointer to the

+ // accessed type.

+ if (isIndirect)

+ OpTy = cast<PointerType>(OpTy)->getElementType();

+ // If OpTy is not a single value, it may be a struct/union that we

+ // can tile with integers.

+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {

+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);

+ switch (BitSize) {

+ default: break;

+ case 1:

+ case 8:

+ case 16:

+ case 32:

+ case 64:

+ case 128:

+ OpTy = IntegerType::get(BitSize);

+ break;

+ }

+ return TLI.getValueType(OpTy, true);

+ }

+private:

+ /// MarkRegAndAliases - Mark the specified register and all aliases in the

+ /// specified set.

+ static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,

+ const TargetRegisterInfo &TRI) {

+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");

+ Regs.insert(Reg);

+ if (const unsigned *Aliases = TRI.getAliasSet(Reg))

+ for (; *Aliases; ++Aliases)

+ Regs.insert(*Aliases);

+ }

+};

+} // end llvm namespace.

+/// GetRegistersForValue - Assign registers (virtual or physical) for the

+/// specified operand. We prefer to assign virtual registers, to allow the

+/// register allocator handle the assignment process. However, if the asm uses

+/// features that we can't model on machineinstrs, we have SDISel do the

+/// allocation. This produces generally horrible, but correct, code.

+///

+/// OpInfo describes the operand.

+/// Input and OutputRegs are the set of already allocated physical registers.

+///

+void SelectionDAGLowering::

+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,

+ std::set<unsigned> &OutputRegs,

+ std::set<unsigned> &InputRegs) {

+ // Compute whether this value requires an input register, an output register,

+ // or both.

+ bool isOutReg = false;

+ bool isInReg = false;

+ switch (OpInfo.Type) {

+ case InlineAsm::isOutput:

+ isOutReg = true;

+ // If there is an input constraint that matches this, we need to reserve

+ // the input register so no other inputs allocate to it.

+ isInReg = OpInfo.hasMatchingInput();

+ break;

+ case InlineAsm::isInput:

+ isInReg = true;

+ isOutReg = false;

+ break;

+ case InlineAsm::isClobber:

+ isOutReg = true;

+ isInReg = true;

+ break;

+ }

+ MachineFunction &MF = DAG.getMachineFunction();

+ SmallVector<unsigned, 4> Regs;

+ // If this is a constraint for a single physreg, or a constraint for a

+ // register class, find it.

+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =

+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,

+ OpInfo.ConstraintVT);

+ unsigned NumRegs = 1;

+ if (OpInfo.ConstraintVT != MVT::Other) {

+ // If this is a FP input in an integer register (or visa versa) insert a bit

+ // cast of the input value. More generally, handle any case where the input

+ // value disagrees with the register class we plan to stick this in.

+ if (OpInfo.Type == InlineAsm::isInput &&

+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {

+ // Try to convert to the first MVT that the reg class contains. If the

+ // types are identical size, use a bitcast to convert (e.g. two differing

+ // vector types).

+ MVT RegVT = *PhysReg.second->vt_begin();

+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {

+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),

+ RegVT, OpInfo.CallOperand);

+ OpInfo.ConstraintVT = RegVT;

+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {

+ // If the input is a FP value and we want it in FP registers, do a

+ // bitcast to the corresponding integer type. This turns an f64 value

+ // into i64, which can be passed with two i32 values on a 32-bit

+ // machine.

+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());

+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),

+ RegVT, OpInfo.CallOperand);

+ OpInfo.ConstraintVT = RegVT;

+ }

+ NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);

+ }

+ MVT RegVT;

+ MVT ValueVT = OpInfo.ConstraintVT;

+ // If this is a constraint for a specific physical register, like {r17},

+ // assign it now.

+ if (unsigned AssignedReg = PhysReg.first) {

+ const TargetRegisterClass *RC = PhysReg.second;

+ if (OpInfo.ConstraintVT == MVT::Other)

+ ValueVT = *RC->vt_begin();

+ // Get the actual register value type. This is important, because the user

+ // may have asked for (e.g.) the AX register in i32 type. We need to

+ // remember that AX is actually i16 to get the right extension.

+ RegVT = *RC->vt_begin();

+ // This is a explicit reference to a physical register.

+ Regs.push_back(AssignedReg);

+ // If this is an expanded reference, add the rest of the regs to Regs.

+ if (NumRegs != 1) {

+ TargetRegisterClass::iterator I = RC->begin();

+ for (; *I != AssignedReg; ++I)

+ assert(I != RC->end() && "Didn't find reg!");

+ // Already added the first reg.

+ --NumRegs; ++I;

+ for (; NumRegs; --NumRegs, ++I) {

+ assert(I != RC->end() && "Ran out of registers to allocate!");

+ Regs.push_back(*I);

+ }

+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);

+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();

+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);

+ return;

+ }

+ // Otherwise, if this was a reference to an LLVM register class, create vregs

+ // for this reference.

+ if (const TargetRegisterClass *RC = PhysReg.second) {

+ RegVT = *RC->vt_begin();

+ if (OpInfo.ConstraintVT == MVT::Other)

+ ValueVT = RegVT;

+ // Create the appropriate number of virtual registers.

+ MachineRegisterInfo &RegInfo = MF.getRegInfo();

+ for (; NumRegs; --NumRegs)

+ Regs.push_back(RegInfo.createVirtualRegister(RC));

+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);

+ return;

+ }

+ // This is a reference to a register class that doesn't directly correspond

+ // to an LLVM register class. Allocate NumRegs consecutive, available,

+ // registers from the class.

+ std::vector<unsigned> RegClassRegs

+ = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,

+ OpInfo.ConstraintVT);

+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();

+ unsigned NumAllocated = 0;

+ for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {

+ unsigned Reg = RegClassRegs[i];

+ // See if this register is available.

+ if ((isOutReg && OutputRegs.count(Reg)) || // Already used.

+ (isInReg && InputRegs.count(Reg))) { // Already used.

+ // Make sure we find consecutive registers.

+ NumAllocated = 0;

+ continue;

+ }

+ // Check to see if this register is allocatable (i.e. don't give out the

+ // stack pointer).

+ const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);

+ if (!RC) { // Couldn't allocate this register.

+ // Reset NumAllocated to make sure we return consecutive registers.

+ NumAllocated = 0;

+ continue;

+ }

+ // Okay, this register is good, we can use it.

+ ++NumAllocated;

+ // If we allocated enough consecutive registers, succeed.

+ if (NumAllocated == NumRegs) {

+ unsigned RegStart = (i-NumAllocated)+1;

+ unsigned RegEnd = i+1;

+ // Mark all of the allocated registers used.

+ for (unsigned i = RegStart; i != RegEnd; ++i)

+ Regs.push_back(RegClassRegs[i]);

+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),

+ OpInfo.ConstraintVT);

+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);

+ return;

+ }

+ // Otherwise, we couldn't allocate enough registers for this.

+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being

+/// processed uses a memory 'm' constraint.

+static bool

+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,

+ const TargetLowering &TLI) {

+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {

+ InlineAsm::ConstraintInfo &CI = CInfos[i];

+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {

+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);

+ if (CType == TargetLowering::C_Memory)

+ return true;

+ }

+ // Indirect operand accesses access memory.

+ if (CI.isIndirect)

+ return true;

+ }

+ return false;

+/// visitInlineAsm - Handle a call to an InlineAsm object.

+///

+void SelectionDAGLowering::visitInlineAsm(CallSite CS) {

+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());

+ /// ConstraintOperands - Information about all of the constraints.

+ std::vector<SDISelAsmOperandInfo> ConstraintOperands;

+ std::set<unsigned> OutputRegs, InputRegs;

+ // Do a prepass over the constraints, canonicalizing them, and building up the

+ // ConstraintOperands list.

+ std::vector<InlineAsm::ConstraintInfo>

+ ConstraintInfos = IA->ParseConstraints();

+ bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);

+ SDValue Chain, Flag;

+ // We won't need to flush pending loads if this asm doesn't touch

+ // memory and is nonvolatile.

+ if (hasMemory || IA->hasSideEffects())

+ Chain = getRoot();

+ else

+ Chain = DAG.getRoot();

+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.

+ unsigned ResNo = 0; // ResNo - The result number of the next output.

+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {

+ ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));

+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();

+ MVT OpVT = MVT::Other;

+ // Compute the value type for each operand.

+ switch (OpInfo.Type) {

+ case InlineAsm::isOutput:

+ // Indirect outputs just consume an argument.

+ if (OpInfo.isIndirect) {

+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);

+ break;

+ }

+ // The return value of the call is this value. As such, there is no

+ // corresponding argument.

+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");

+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {

+ OpVT = TLI.getValueType(STy->getElementType(ResNo));

+ } else {

+ assert(ResNo == 0 && "Asm only has one result!");

+ OpVT = TLI.getValueType(CS.getType());

+ }

+ ++ResNo;

+ break;

+ case InlineAsm::isInput:

+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);

+ break;

+ case InlineAsm::isClobber:

+ // Nothing to do.

+ break;

+ }

+ // If this is an input or an indirect output, process the call argument.

+ // BasicBlocks are labels, currently appearing only in asm's.

+ if (OpInfo.CallOperandVal) {

+ if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {

+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);

+ } else {

+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);

+ }

+ OpVT = OpInfo.getCallOperandValMVT(TLI, TD);

+ }

+ OpInfo.ConstraintVT = OpVT;

+ }

+ // Second pass over the constraints: compute which constraint option to use

+ // and assign registers to constraints that want a specific physreg.

+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {

+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];

+ // If this is an output operand with a matching input operand, look up the

+ // matching input. If their types mismatch, e.g. one is an integer, the

+ // other is floating point, or their sizes are different, flag it as an

+ // error.

+ if (OpInfo.hasMatchingInput()) {

+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];

+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {

+ if ((OpInfo.ConstraintVT.isInteger() !=

+ Input.ConstraintVT.isInteger()) ||

+ (OpInfo.ConstraintVT.getSizeInBits() !=

+ Input.ConstraintVT.getSizeInBits())) {

+ cerr << "llvm: error: Unsupported asm: input constraint with a "

+ << "matching output constraint of incompatible type!\n";

+ exit(1);

+ }

+ Input.ConstraintVT = OpInfo.ConstraintVT;

+ }

+ // Compute the constraint code and ConstraintType to use.

+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);

+ // If this is a memory input, and if the operand is not indirect, do what we

+ // need to to provide an address for the memory input.

+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&

+ !OpInfo.isIndirect) {

+ assert(OpInfo.Type == InlineAsm::isInput &&

+ "Can only indirectify direct input operands!");

+ // Memory operands really want the address of the value. If we don't have

+ // an indirect input, put it in the constpool if we can, otherwise spill

+ // it to a stack slot.

+ // If the operand is a float, integer, or vector constant, spill to a

+ // constant pool entry to get its address.

+ Value *OpVal = OpInfo.CallOperandVal;

+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||

+ isa<ConstantVector>(OpVal)) {

+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),

+ TLI.getPointerTy());

+ } else {

+ // Otherwise, create a stack slot and emit a store to it before the

+ // asm.

+ const Type *Ty = OpVal->getType();

+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);

+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);

+ MachineFunction &MF = DAG.getMachineFunction();

+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);

+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());

+ Chain = DAG.getStore(Chain, getCurDebugLoc(),

+ OpInfo.CallOperand, StackSlot, NULL, 0);

+ OpInfo.CallOperand = StackSlot;

+ }

+ // There is no longer a Value* corresponding to this operand.

+ OpInfo.CallOperandVal = 0;

+ // It is now an indirect operand.

+ OpInfo.isIndirect = true;

+ }

+ // If this constraint is for a specific register, allocate it before

+ // anything else.

+ if (OpInfo.ConstraintType == TargetLowering::C_Register)

+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);

+ }

+ ConstraintInfos.clear();

+ // Second pass - Loop over all of the operands, assigning virtual or physregs

+ // to register class operands.

+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {

+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];

+ // C_Register operands have already been allocated, Other/Memory don't need

+ // to be.

+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)

+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);

+ }

+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.

+ std::vector<SDValue> AsmNodeOperands;

+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain

+ AsmNodeOperands.push_back(

+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));

+ // Loop over all of the inputs, copying the operand values into the

+ // appropriate registers and processing the output regs.

+ RegsForValue RetValRegs;

+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.

+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;

+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {

+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];

+ switch (OpInfo.Type) {

+ case InlineAsm::isOutput: {

+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&

+ OpInfo.ConstraintType != TargetLowering::C_Register) {

+ // Memory output, or 'other' output (e.g. 'X' constraint).

+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");

+ // Add information to the INLINEASM node to know about this output.

+ unsigned ResOpType = 4/*MEM*/ | (1<<3);

+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,

+ TLI.getPointerTy()));

+ AsmNodeOperands.push_back(OpInfo.CallOperand);

+ break;

+ }

+ // Otherwise, this is a register or register class output.

+ // Copy the output from the appropriate register. Find a register that

+ // we can use.

+ if (OpInfo.AssignedRegs.Regs.empty()) {

+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"

+ << OpInfo.ConstraintCode << "'!\n";

+ exit(1);

+ }

+ // If this is an indirect operand, store through the pointer after the

+ // asm.

+ if (OpInfo.isIndirect) {

+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,

+ OpInfo.CallOperandVal));

+ } else {

+ // This is the result value of the call.

+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");

+ // Concatenate this output onto the outputs list.

+ RetValRegs.append(OpInfo.AssignedRegs);

+ }

+ // Add information to the INLINEASM node to know that this register is

+ // set.

+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?

+ 6 /* EARLYCLOBBER REGDEF */ :

+ 2 /* REGDEF */ ,

+ false,

+ 0,

+ DAG, AsmNodeOperands);

+ break;

+ }

+ case InlineAsm::isInput: {

+ SDValue InOperandVal = OpInfo.CallOperand;

+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?

+ // If this is required to match an output register we have already set,

+ // just use its register.

+ unsigned OperandNo = OpInfo.getMatchedOperand();

+ // Scan until we find the definition we already emitted of this operand.

+ // When we find it, create a RegsForValue operand.

+ unsigned CurOp = 2; // The first operand.

+ for (; OperandNo; --OperandNo) {

+ // Advance to the next operand.

+ unsigned OpFlag =

+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();

+ assert(((OpFlag & 7) == 2 /*REGDEF*/ ||

+ (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||

+ (OpFlag & 7) == 4 /*MEM*/) &&

+ "Skipped past definitions?");

+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;

+ }

+ unsigned OpFlag =

+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();

+ if ((OpFlag & 7) == 2 /*REGDEF*/

+ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {

+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.

+ assert(!OpInfo.isIndirect &&

+ "Don't know how to handle tied indirect register inputs yet!");

+ RegsForValue MatchedRegs;

+ MatchedRegs.TLI = &TLI;

+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());

+ MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();

+ MatchedRegs.RegVTs.push_back(RegVT);

+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();

+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);

+ i != e; ++i)

+ MatchedRegs.Regs.

+ push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));

+ // Use the produced MatchedRegs object to

+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),

+ Chain, &Flag);

+ MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,

+ true, OpInfo.getMatchedOperand(),

+ DAG, AsmNodeOperands);

+ break;

+ } else {

+ assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");

+ assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&

+ "Unexpected number of operands");

+ // Add information to the INLINEASM node to know about this input.

+ // See InlineAsm.h isUseOperandTiedToDef.

+ OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);

+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,

+ TLI.getPointerTy()));

+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);

+ break;

+ }

+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {

+ assert(!OpInfo.isIndirect &&

+ "Don't know how to handle indirect other inputs yet!");

+ std::vector<SDValue> Ops;

+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],

+ hasMemory, Ops, DAG);

+ if (Ops.empty()) {

+ cerr << "llvm: error: Invalid operand for inline asm constraint '"

+ << OpInfo.ConstraintCode << "'!\n";

+ exit(1);

+ }

+ // Add information to the INLINEASM node to know about this input.

+ unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);

+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,

+ TLI.getPointerTy()));

+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());

+ break;

+ } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {

+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");

+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&

+ "Memory operands expect pointer values");

+ // Add information to the INLINEASM node to know about this input.

+ unsigned ResOpType = 4/*MEM*/ | (1<<3);

+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,

+ TLI.getPointerTy()));

+ AsmNodeOperands.push_back(InOperandVal);

+ break;

+ }

+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||

+ OpInfo.ConstraintType == TargetLowering::C_Register) &&

+ "Unknown constraint type!");

+ assert(!OpInfo.isIndirect &&

+ "Don't know how to handle indirect register inputs yet!");

+ // Copy the input into the appropriate registers.

+ if (OpInfo.AssignedRegs.Regs.empty()) {

+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"

+ << OpInfo.ConstraintCode << "'!\n";

+ exit(1);

+ }

+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),

+ Chain, &Flag);

+ OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,

+ DAG, AsmNodeOperands);

+ break;

+ }

+ case InlineAsm::isClobber: {

+ // Add the clobbered value to the operand list, so that the register

+ // allocator is aware that the physreg got clobbered.

+ if (!OpInfo.AssignedRegs.Regs.empty())

+ OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,

+ false, 0, DAG,AsmNodeOperands);

+ break;

+ }

+ // Finish up input operands.

+ AsmNodeOperands[0] = Chain;

+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);

+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),

+ DAG.getVTList(MVT::Other, MVT::Flag),

+ &AsmNodeOperands[0], AsmNodeOperands.size());

+ Flag = Chain.getValue(1);

+ // If this asm returns a register value, copy the result from that register

+ // and set it as the value of the call.

+ if (!RetValRegs.Regs.empty()) {

+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),

+ Chain, &Flag);

+ // FIXME: Why don't we do this for inline asms with MRVs?

+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {

+ MVT ResultType = TLI.getValueType(CS.getType());

+ // If any of the results of the inline asm is a vector, it may have the

+ // wrong width/num elts. This can happen for register classes that can

+ // contain multiple different value types. The preg or vreg allocated may

+ // not have the same VT as was expected. Convert it to the right type

+ // with bit_convert.

+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {

+ Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),

+ ResultType, Val);

+ } else if (ResultType != Val.getValueType() &&

+ ResultType.isInteger() && Val.getValueType().isInteger()) {

+ // If a result value was tied to an input value, the computed result may

+ // have a wider width than the expected result. Extract the relevant

+ // portion.

+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);

+ }

+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");

+ }

+ setValue(CS.getInstruction(), Val);

+ // Don't need to use this as a chain in this case.

+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())

+ return;

+ }

+ std::vector<std::pair<SDValue, Value*> > StoresToEmit;

+ // Process indirect outputs, first output all of the flagged copies out of

+ // physregs.

+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {

+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;

+ Value *Ptr = IndirectStoresToEmit[i].second;

+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),

+ Chain, &Flag);

+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));

+ }

+ // Emit the non-flagged stores from the physregs.

+ SmallVector<SDValue, 8> OutChains;

+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)

+ OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),

+ StoresToEmit[i].first,

+ getValue(StoresToEmit[i].second),

+ StoresToEmit[i].second, 0));

+ if (!OutChains.empty())

+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,

+ &OutChains[0], OutChains.size());

+ DAG.setRoot(Chain);

+void SelectionDAGLowering::visitMalloc(MallocInst &I) {

+ SDValue Src = getValue(I.getOperand(0));

+ // Scale up by the type size in the original i32 type width. Various

+ // mid-level optimizers may make assumptions about demanded bits etc from the

+ // i32-ness of the optimizer: we do not want to promote to i64 and then

+ // multiply on 64-bit targets.

+ // FIXME: Malloc inst should go away: PR715.

+ uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());

+ if (ElementSize != 1)

+ Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),

+ Src, DAG.getConstant(ElementSize, Src.getValueType()));

+ MVT IntPtr = TLI.getPointerTy();

+ if (IntPtr.bitsLT(Src.getValueType()))

+ Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src);

+ else if (IntPtr.bitsGT(Src.getValueType()))

+ Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Entry.Node = Src;

+ Entry.Ty = TLI.getTargetData()->getIntPtrType();

+ Args.push_back(Entry);

+ std::pair<SDValue,SDValue> Result =

+ TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,

+ CallingConv::C, PerformTailCallOpt,

+ DAG.getExternalSymbol("malloc", IntPtr),

+ Args, DAG, getCurDebugLoc());

+ setValue(&I, Result.first); // Pointers always fit in registers

+ DAG.setRoot(Result.second);

+void SelectionDAGLowering::visitFree(FreeInst &I) {

+ TargetLowering::ArgListTy Args;

+ TargetLowering::ArgListEntry Entry;

+ Entry.Node = getValue(I.getOperand(0));

+ Entry.Ty = TLI.getTargetData()->getIntPtrType();

+ Args.push_back(Entry);

+ MVT IntPtr = TLI.getPointerTy();

+ std::pair<SDValue,SDValue> Result =

+ TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,

+ CallingConv::C, PerformTailCallOpt,

+ DAG.getExternalSymbol("free", IntPtr), Args, DAG,

+ getCurDebugLoc());

+ DAG.setRoot(Result.second);

+void SelectionDAGLowering::visitVAStart(CallInst &I) {

+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),

+ MVT::Other, getRoot(),

+ getValue(I.getOperand(1)),

+ DAG.getSrcValue(I.getOperand(1))));

+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {

+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),

+ getRoot(), getValue(I.getOperand(0)),

+ DAG.getSrcValue(I.getOperand(0)));

+ setValue(&I, V);

+ DAG.setRoot(V.getValue(1));

+void SelectionDAGLowering::visitVAEnd(CallInst &I) {

+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),

+ MVT::Other, getRoot(),

+ getValue(I.getOperand(1)),

+ DAG.getSrcValue(I.getOperand(1))));

+void SelectionDAGLowering::visitVACopy(CallInst &I) {

+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),

+ MVT::Other, getRoot(),

+ getValue(I.getOperand(1)),

+ getValue(I.getOperand(2)),

+ DAG.getSrcValue(I.getOperand(1)),

+ DAG.getSrcValue(I.getOperand(2))));

+/// TargetLowering::LowerArguments - This is the default LowerArguments

+/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all

+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be

+/// integrated into SDISel.

+void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,

+ SmallVectorImpl<SDValue> &ArgValues,

+ DebugLoc dl) {

+ // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.

+ SmallVector<SDValue, 3+16> Ops;

+ Ops.push_back(DAG.getRoot());

+ Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));

+ Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));

+ // Add one result value for each formal argument.

+ SmallVector<MVT, 16> RetVals;

+ unsigned j = 1;

+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();

+ I != E; ++I, ++j) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(*this, I->getType(), ValueVTs);

+ for (unsigned Value = 0, NumValues = ValueVTs.size();

+ Value != NumValues; ++Value) {

+ MVT VT = ValueVTs[Value];

+ const Type *ArgTy = VT.getTypeForMVT();

+ ISD::ArgFlagsTy Flags;

+ unsigned OriginalAlignment =

+ getTargetData()->getABITypeAlignment(ArgTy);

+ if (F.paramHasAttr(j, Attribute::ZExt))

+ Flags.setZExt();

+ if (F.paramHasAttr(j, Attribute::SExt))

+ Flags.setSExt();

+ if (F.paramHasAttr(j, Attribute::InReg))

+ Flags.setInReg();

+ if (F.paramHasAttr(j, Attribute::StructRet))

+ Flags.setSRet();

+ if (F.paramHasAttr(j, Attribute::ByVal)) {

+ Flags.setByVal();

+ const PointerType *Ty = cast<PointerType>(I->getType());

+ const Type *ElementTy = Ty->getElementType();

+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);

+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);

+ // For ByVal, alignment should be passed from FE. BE will guess if

+ // this info is not there but there are cases it cannot get right.

+ if (F.getParamAlignment(j))

+ FrameAlign = F.getParamAlignment(j);

+ Flags.setByValAlign(FrameAlign);

+ Flags.setByValSize(FrameSize);

+ }

+ if (F.paramHasAttr(j, Attribute::Nest))

+ Flags.setNest();

+ Flags.setOrigAlign(OriginalAlignment);

+ MVT RegisterVT = getRegisterType(VT);

+ unsigned NumRegs = getNumRegisters(VT);

+ for (unsigned i = 0; i != NumRegs; ++i) {

+ RetVals.push_back(RegisterVT);

+ ISD::ArgFlagsTy MyFlags = Flags;

+ if (NumRegs > 1 && i == 0)

+ MyFlags.setSplit();

+ // if it isn't first piece, alignment must be 1

+ else if (i > 0)

+ MyFlags.setOrigAlign(1);

+ Ops.push_back(DAG.getArgFlags(MyFlags));

+ }

+ RetVals.push_back(MVT::Other);

+ // Create the node.

+ SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl,

+ DAG.getVTList(&RetVals[0], RetVals.size()),

+ &Ops[0], Ops.size()).getNode();

+ // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but

+ // allows exposing the loads that may be part of the argument access to the

+ // first DAGCombiner pass.

+ SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG);

+ // The number of results should match up, except that the lowered one may have

+ // an extra flag result.

+ assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() ||

+ (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() &&

+ TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag))

+ && "Lowering produced unexpected number of results!");

+ // The FORMAL_ARGUMENTS node itself is likely no longer needed.

+ if (Result != TmpRes.getNode() && Result->use_empty()) {

+ HandleSDNode Dummy(DAG.getRoot());

+ DAG.RemoveDeadNode(Result);

+ }

+ Result = TmpRes.getNode();

+ unsigned NumArgRegs = Result->getNumValues() - 1;

+ DAG.setRoot(SDValue(Result, NumArgRegs));

+ // Set up the return result vector.

+ unsigned i = 0;

+ unsigned Idx = 1;

+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;

+ ++I, ++Idx) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(*this, I->getType(), ValueVTs);

+ for (unsigned Value = 0, NumValues = ValueVTs.size();

+ Value != NumValues; ++Value) {

+ MVT VT = ValueVTs[Value];

+ MVT PartVT = getRegisterType(VT);

+ unsigned NumParts = getNumRegisters(VT);

+ SmallVector<SDValue, 4> Parts(NumParts);

+ for (unsigned j = 0; j != NumParts; ++j)

+ Parts[j] = SDValue(Result, i++);

+ ISD::NodeType AssertOp = ISD::DELETED_NODE;

+ if (F.paramHasAttr(Idx, Attribute::SExt))

+ AssertOp = ISD::AssertSext;

+ else if (F.paramHasAttr(Idx, Attribute::ZExt))

+ AssertOp = ISD::AssertZext;

+ ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts,

+ PartVT, VT, AssertOp));

+ }

+ assert(i == NumArgRegs && "Argument register count mismatch!");

+/// TargetLowering::LowerCallTo - This is the default LowerCallTo

+/// implementation, which just inserts an ISD::CALL node, which is later custom

+/// lowered by the target to something concrete. FIXME: When all targets are

+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.

+std::pair<SDValue, SDValue>

+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,

+ bool RetSExt, bool RetZExt, bool isVarArg,

+ bool isInreg,

+ unsigned CallingConv, bool isTailCall,

+ SDValue Callee,

+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {

+ assert((!isTailCall || PerformTailCallOpt) &&

+ "isTailCall set when tail-call optimizations are disabled!");

+ SmallVector<SDValue, 32> Ops;

+ Ops.push_back(Chain); // Op#0 - Chain

+ Ops.push_back(Callee);

+ // Handle all of the outgoing arguments.

+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);

+ for (unsigned Value = 0, NumValues = ValueVTs.size();

+ Value != NumValues; ++Value) {

+ MVT VT = ValueVTs[Value];

+ const Type *ArgTy = VT.getTypeForMVT();

+ SDValue Op = SDValue(Args[i].Node.getNode(),

+ Args[i].Node.getResNo() + Value);

+ ISD::ArgFlagsTy Flags;

+ unsigned OriginalAlignment =

+ getTargetData()->getABITypeAlignment(ArgTy);

+ if (Args[i].isZExt)

+ Flags.setZExt();

+ if (Args[i].isSExt)

+ Flags.setSExt();

+ if (Args[i].isInReg)

+ Flags.setInReg();

+ if (Args[i].isSRet)

+ Flags.setSRet();

+ if (Args[i].isByVal) {

+ Flags.setByVal();

+ const PointerType *Ty = cast<PointerType>(Args[i].Ty);

+ const Type *ElementTy = Ty->getElementType();

+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);

+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);

+ // For ByVal, alignment should come from FE. BE will guess if this

+ // info is not there but there are cases it cannot get right.

+ if (Args[i].Alignment)

+ FrameAlign = Args[i].Alignment;

+ Flags.setByValAlign(FrameAlign);

+ Flags.setByValSize(FrameSize);

+ }

+ if (Args[i].isNest)

+ Flags.setNest();

+ Flags.setOrigAlign(OriginalAlignment);

+ MVT PartVT = getRegisterType(VT);

+ unsigned NumParts = getNumRegisters(VT);

+ SmallVector<SDValue, 4> Parts(NumParts);

+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

+ if (Args[i].isSExt)

+ ExtendKind = ISD::SIGN_EXTEND;

+ else if (Args[i].isZExt)

+ ExtendKind = ISD::ZERO_EXTEND;

+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);

+ for (unsigned i = 0; i != NumParts; ++i) {

+ // if it isn't first piece, alignment must be 1

+ ISD::ArgFlagsTy MyFlags = Flags;

+ if (NumParts > 1 && i == 0)

+ MyFlags.setSplit();

+ else if (i != 0)

+ MyFlags.setOrigAlign(1);

+ Ops.push_back(Parts[i]);

+ Ops.push_back(DAG.getArgFlags(MyFlags));

+ }

+ // Figure out the result value types. We start by making a list of

+ // the potentially illegal return value types.

+ SmallVector<MVT, 4> LoweredRetTys;

+ SmallVector<MVT, 4> RetTys;

+ ComputeValueVTs(*this, RetTy, RetTys);

+ // Then we translate that to a list of legal types.

+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {

+ MVT VT = RetTys[I];

+ MVT RegisterVT = getRegisterType(VT);

+ unsigned NumRegs = getNumRegisters(VT);

+ for (unsigned i = 0; i != NumRegs; ++i)

+ LoweredRetTys.push_back(RegisterVT);

+ }

+ LoweredRetTys.push_back(MVT::Other); // Always has a chain.

+ // Create the CALL node.

+ SDValue Res = DAG.getCall(CallingConv, dl,

+ isVarArg, isTailCall, isInreg,

+ DAG.getVTList(&LoweredRetTys[0],

+ LoweredRetTys.size()),

+ &Ops[0], Ops.size()

+ );

+ Chain = Res.getValue(LoweredRetTys.size() - 1);

+ // Gather up the call result into a single value.

+ if (RetTy != Type::VoidTy && !RetTys.empty()) {

+ ISD::NodeType AssertOp = ISD::DELETED_NODE;

+ if (RetSExt)

+ AssertOp = ISD::AssertSext;

+ else if (RetZExt)

+ AssertOp = ISD::AssertZext;

+ SmallVector<SDValue, 4> ReturnValues;

+ unsigned RegNo = 0;

+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {

+ MVT VT = RetTys[I];

+ MVT RegisterVT = getRegisterType(VT);

+ unsigned NumRegs = getNumRegisters(VT);

+ unsigned RegNoEnd = NumRegs + RegNo;

+ SmallVector<SDValue, 4> Results;

+ for (; RegNo != RegNoEnd; ++RegNo)

+ Results.push_back(Res.getValue(RegNo));

+ SDValue ReturnValue =

+ getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT,

+ AssertOp);

+ ReturnValues.push_back(ReturnValue);

+ }

+ Res = DAG.getNode(ISD::MERGE_VALUES, dl,

+ DAG.getVTList(&RetTys[0], RetTys.size()),

+ &ReturnValues[0], ReturnValues.size());

+ }

+ return std::make_pair(Res, Chain);

+void TargetLowering::LowerOperationWrapper(SDNode *N,

+ SmallVectorImpl<SDValue> &Results,

+ SelectionDAG &DAG) {

+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);

+ if (Res.getNode())

+ Results.push_back(Res);

+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {

+ assert(0 && "LowerOperation not implemented for this target!");

+ abort();

+ return SDValue();

+void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {

+ SDValue Op = getValue(V);

+ assert((Op.getOpcode() != ISD::CopyFromReg ||

+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&

+ "Copy from a reg to the same reg!");

+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");

+ RegsForValue RFV(TLI, Reg, V->getType());

+ SDValue Chain = DAG.getEntryNode();

+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);

+ PendingExports.push_back(Chain);

+#include "llvm/CodeGen/SelectionDAGISel.h"

+void SelectionDAGISel::

+LowerArguments(BasicBlock *LLVMBB) {

+ // If this is the entry block, emit arguments.

+ Function &F = *LLVMBB->getParent();

+ SDValue OldRoot = SDL->DAG.getRoot();

+ SmallVector<SDValue, 16> Args;

+ TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc());

+ unsigned a = 0;

+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();

+ AI != E; ++AI) {

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, AI->getType(), ValueVTs);

+ unsigned NumValues = ValueVTs.size();

+ if (!AI->use_empty()) {

+ SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues,

+ SDL->getCurDebugLoc()));

+ // If this argument is live outside of the entry block, insert a copy from

+ // whereever we got it to the vreg that other BB's will reference it as.

+ SDL->CopyToExportRegsIfNeeded(AI);

+ }

+ a += NumValues;

+ }

+ // Finally, if the target has anything special to do, allow it to do so.

+ // FIXME: this should insert code into the DAG!

+ EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction());

+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to

+/// ensure constants are generated when needed. Remember the virtual registers

+/// that need to be added to the Machine PHI nodes as input. We cannot just

+/// directly add them, because expansion might result in multiple MBB's for one

+/// BB. As such, the start of the BB might correspond to a different MBB than

+/// the end.

+///

+void

+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {

+ TerminatorInst *TI = LLVMBB->getTerminator();

+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;

+ // Check successor nodes' PHI nodes that expect a constant to be available

+ // from this block.

+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {

+ BasicBlock *SuccBB = TI->getSuccessor(succ);

+ if (!isa<PHINode>(SuccBB->begin())) continue;

+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];

+ // If this terminator has multiple identical successors (common for

+ // switches), only handle each succ once.

+ if (!SuccsHandled.insert(SuccMBB)) continue;

+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();

+ PHINode *PN;

+ // At this point we know that there is a 1-1 correspondence between LLVM PHI

+ // nodes and Machine PHI nodes, but the incoming operands have not been

+ // emitted yet.

+ for (BasicBlock::iterator I = SuccBB->begin();

+ (PN = dyn_cast<PHINode>(I)); ++I) {

+ // Ignore dead phi's.

+ if (PN->use_empty()) continue;

+ unsigned Reg;

+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);

+ if (Constant *C = dyn_cast<Constant>(PHIOp)) {

+ unsigned &RegOut = SDL->ConstantsOut[C];

+ if (RegOut == 0) {

+ RegOut = FuncInfo->CreateRegForValue(C);

+ SDL->CopyValueToVirtualRegister(C, RegOut);

+ }

+ Reg = RegOut;

+ } else {

+ Reg = FuncInfo->ValueMap[PHIOp];

+ if (Reg == 0) {

+ assert(isa<AllocaInst>(PHIOp) &&

+ FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&

+ "Didn't codegen value into a register!??");

+ Reg = FuncInfo->CreateRegForValue(PHIOp);

+ SDL->CopyValueToVirtualRegister(PHIOp, Reg);

+ }

+ // Remember that this register needs to added to the machine PHI node as

+ // the input for this MBB.

+ SmallVector<MVT, 4> ValueVTs;

+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);

+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {

+ MVT VT = ValueVTs[vti];

+ unsigned NumRegisters = TLI.getNumRegisters(VT);

+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)

+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));

+ Reg += NumRegisters;

+ }

+ SDL->ConstantsOut.clear();

+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only

+/// supports legal types, and it emits MachineInstrs directly instead of

+/// creating SelectionDAG nodes.

+///

+bool

+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,

+ FastISel *F) {

+ TerminatorInst *TI = LLVMBB->getTerminator();

+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;

+ unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size();

+ // Check successor nodes' PHI nodes that expect a constant to be available

+ // from this block.

+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {

+ BasicBlock *SuccBB = TI->getSuccessor(succ);

+ if (!isa<PHINode>(SuccBB->begin())) continue;

+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];

+ // If this terminator has multiple identical successors (common for

+ // switches), only handle each succ once.

+ if (!SuccsHandled.insert(SuccMBB)) continue;

+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();

+ PHINode *PN;

+ // At this point we know that there is a 1-1 correspondence between LLVM PHI

+ // nodes and Machine PHI nodes, but the incoming operands have not been

+ // emitted yet.

+ for (BasicBlock::iterator I = SuccBB->begin();

+ (PN = dyn_cast<PHINode>(I)); ++I) {

+ // Ignore dead phi's.

+ if (PN->use_empty()) continue;

+ // Only handle legal types. Two interesting things to note here. First,

+ // by bailing out early, we may leave behind some dead instructions,

+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its

+ // own moves. Second, this check is necessary becuase FastISel doesn't

+ // use CreateRegForValue to create registers, so it always creates

+ // exactly one register for each non-void instruction.

+ MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);

+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {

+ // Promote MVT::i1.

+ if (VT == MVT::i1)

+ VT = TLI.getTypeToTransformTo(VT);

+ else {

+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);

+ return false;

+ }

+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);

+ unsigned Reg = F->getRegForValue(PHIOp);

+ if (Reg == 0) {

+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);

+ return false;

+ }

+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));

+ }

+ return true;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
new file mode 100644
index 000000000000..578aa591ce67
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h

@@ -0,0 +1,558 @@

+//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements routines for translating from LLVM IR into SelectionDAG IR.

+//

+//===----------------------------------------------------------------------===//

+#ifndef SELECTIONDAGBUILD_H

+#define SELECTIONDAGBUILD_H

+#include "llvm/Constants.h"

+#include "llvm/ADT/APInt.h"

+#include "llvm/ADT/DenseMap.h"

+#ifndef NDEBUG

+#include "llvm/ADT/SmallSet.h"

+#endif

+#include "llvm/CodeGen/SelectionDAGNodes.h"

+#include "llvm/CodeGen/ValueTypes.h"

+#include "llvm/Support/CallSite.h"

+#include "llvm/Target/TargetMachine.h"

+#include <vector>

+#include <set>

+namespace llvm {

+class AliasAnalysis;

+class AllocaInst;

+class BasicBlock;

+class BitCastInst;

+class BranchInst;

+class CallInst;

+class ExtractElementInst;

+class ExtractValueInst;

+class FCmpInst;

+class FPExtInst;

+class FPToSIInst;

+class FPToUIInst;

+class FPTruncInst;

+class FreeInst;

+class Function;

+class GetElementPtrInst;

+class GCFunctionInfo;

+class ICmpInst;

+class IntToPtrInst;

+class InvokeInst;

+class InsertElementInst;

+class InsertValueInst;

+class Instruction;

+class LoadInst;

+class MachineBasicBlock;

+class MachineFunction;

+class MachineInstr;

+class MachineModuleInfo;

+class MachineRegisterInfo;

+class MallocInst;

+class PHINode;

+class PtrToIntInst;

+class ReturnInst;

+class SDISelAsmOperandInfo;

+class SExtInst;

+class SelectInst;

+class ShuffleVectorInst;

+class SIToFPInst;

+class StoreInst;

+class SwitchInst;

+class TargetData;

+class TargetLowering;

+class TruncInst;

+class UIToFPInst;

+class UnreachableInst;

+class UnwindInst;

+class VICmpInst;

+class VFCmpInst;

+class VAArgInst;

+class ZExtInst;

+//===--------------------------------------------------------------------===//

+/// FunctionLoweringInfo - This contains information that is global to a

+/// function that is used when lowering a region of the function.

+///

+class FunctionLoweringInfo {

+public:

+ TargetLowering &TLI;

+ Function *Fn;

+ MachineFunction *MF;

+ MachineRegisterInfo *RegInfo;

+ explicit FunctionLoweringInfo(TargetLowering &TLI);

+ /// set - Initialize this FunctionLoweringInfo with the given Function

+ /// and its associated MachineFunction.

+ ///

+ void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG,

+ bool EnableFastISel);

+ /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.

+ DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;

+ /// ValueMap - Since we emit code for the function a basic block at a time,

+ /// we must remember which virtual registers hold the values for

+ /// cross-basic-block values.

+ DenseMap<const Value*, unsigned> ValueMap;

+ /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in

+ /// the entry block. This allows the allocas to be efficiently referenced

+ /// anywhere in the function.

+ DenseMap<const AllocaInst*, int> StaticAllocaMap;

+#ifndef NDEBUG

+ SmallSet<Instruction*, 8> CatchInfoLost;

+ SmallSet<Instruction*, 8> CatchInfoFound;

+#endif

+ unsigned MakeReg(MVT VT);

+ /// isExportedInst - Return true if the specified value is an instruction

+ /// exported from its block.

+ bool isExportedInst(const Value *V) {

+ return ValueMap.count(V);

+ }

+ unsigned CreateRegForValue(const Value *V);

+ unsigned InitializeRegForValue(const Value *V) {

+ unsigned &R = ValueMap[V];

+ assert(R == 0 && "Already initialized this value register!");

+ return R = CreateRegForValue(V);

+ }

+ struct LiveOutInfo {

+ unsigned NumSignBits;

+ APInt KnownOne, KnownZero;

+ LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}

+ };

+ /// LiveOutRegInfo - Information about live out vregs, indexed by their

+ /// register number offset by 'FirstVirtualRegister'.

+ std::vector<LiveOutInfo> LiveOutRegInfo;

+ /// clear - Clear out all the function-specific state. This returns this

+ /// FunctionLoweringInfo to an empty state, ready to be used for a

+ /// different function.

+ void clear() {

+ MBBMap.clear();

+ ValueMap.clear();

+ StaticAllocaMap.clear();

+#ifndef NDEBUG

+ CatchInfoLost.clear();

+ CatchInfoFound.clear();

+#endif

+ LiveOutRegInfo.clear();

+ }

+};

+//===----------------------------------------------------------------------===//

+/// SelectionDAGLowering - This is the common target-independent lowering

+/// implementation that is parameterized by a TargetLowering object.

+/// Also, targets can overload any lowering method.

+///

+class SelectionDAGLowering {

+ MachineBasicBlock *CurMBB;

+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.

+ DebugLoc CurDebugLoc;

+ DenseMap<const Value*, SDValue> NodeMap;

+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch

+ /// them up and then emit token factor nodes when possible. This allows us to

+ /// get simple disambiguation between loads without worrying about alias

+ /// analysis.

+ SmallVector<SDValue, 8> PendingLoads;

+ /// PendingExports - CopyToReg nodes that copy values to virtual registers

+ /// for export to other blocks need to be emitted before any terminator

+ /// instruction, but they have no other ordering requirements. We bunch them

+ /// up and the emit a single tokenfactor for them just before terminator

+ /// instructions.

+ SmallVector<SDValue, 8> PendingExports;

+ /// Case - A struct to record the Value for a switch case, and the

+ /// case's target basic block.

+ struct Case {

+ Constant* Low;

+ Constant* High;

+ MachineBasicBlock* BB;

+ Case() : Low(0), High(0), BB(0) { }

+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :

+ Low(low), High(high), BB(bb) { }

+ uint64_t size() const {

+ uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();

+ uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();

+ return (rHigh - rLow + 1ULL);

+ }

+ };

+ struct CaseBits {

+ uint64_t Mask;

+ MachineBasicBlock* BB;

+ unsigned Bits;

+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):

+ Mask(mask), BB(bb), Bits(bits) { }

+ };

+ typedef std::vector<Case> CaseVector;

+ typedef std::vector<CaseBits> CaseBitsVector;

+ typedef CaseVector::iterator CaseItr;

+ typedef std::pair<CaseItr, CaseItr> CaseRange;

+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree

+ /// of conditional branches.

+ struct CaseRec {

+ CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :

+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}

+ /// CaseBB - The MBB in which to emit the compare and branch

+ MachineBasicBlock *CaseBB;

+ /// LT, GE - If nonzero, we know the current case value must be less-than or

+ /// greater-than-or-equal-to these Constants.

+ Constant *LT;

+ Constant *GE;

+ /// Range - A pair of iterators representing the range of case values to be

+ /// processed at this point in the binary search tree.

+ CaseRange Range;

+ };

+ typedef std::vector<CaseRec> CaseRecVector;

+ /// The comparison function for sorting the switch case values in the vector.

+ /// WARNING: Case ranges should be disjoint!

+ struct CaseCmp {

+ bool operator () (const Case& C1, const Case& C2) {

+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));

+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);

+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);

+ return CI1->getValue().slt(CI2->getValue());

+ }

+ };

+ struct CaseBitsCmp {

+ bool operator () (const CaseBits& C1, const CaseBits& C2) {

+ return C1.Bits > C2.Bits;

+ }

+ };

+ size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);

+ /// CaseBlock - This structure is used to communicate between SDLowering and

+ /// SDISel for the code generation of additional basic blocks needed by multi-

+ /// case switch statements.

+ struct CaseBlock {

+ CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,

+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,

+ MachineBasicBlock *me)

+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),

+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}

+ // CC - the condition code to use for the case block's setcc node

+ ISD::CondCode CC;

+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.

+ // Emit by default LHS op RHS. MHS is used for range comparisons:

+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).

+ Value *CmpLHS, *CmpMHS, *CmpRHS;

+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.

+ MachineBasicBlock *TrueBB, *FalseBB;

+ // ThisBB - the block into which to emit the code for the setcc and branches

+ MachineBasicBlock *ThisBB;

+ };

+ struct JumpTable {

+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,

+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}

+ /// Reg - the virtual register containing the index of the jump table entry

+ //. to jump to.

+ unsigned Reg;

+ /// JTI - the JumpTableIndex for this jump table in the function.

+ unsigned JTI;

+ /// MBB - the MBB into which to emit the code for the indirect jump.

+ MachineBasicBlock *MBB;

+ /// Default - the MBB of the default bb, which is a successor of the range

+ /// check MBB. This is when updating PHI nodes in successors.

+ MachineBasicBlock *Default;

+ };

+ struct JumpTableHeader {

+ JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,

+ bool E = false):

+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}

+ APInt First;

+ APInt Last;

+ Value *SValue;

+ MachineBasicBlock *HeaderBB;

+ bool Emitted;

+ };

+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;

+ struct BitTestCase {

+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):

+ Mask(M), ThisBB(T), TargetBB(Tr) { }

+ uint64_t Mask;

+ MachineBasicBlock* ThisBB;

+ MachineBasicBlock* TargetBB;

+ };

+ typedef SmallVector<BitTestCase, 3> BitTestInfo;

+ struct BitTestBlock {

+ BitTestBlock(APInt F, APInt R, Value* SV,

+ unsigned Rg, bool E,

+ MachineBasicBlock* P, MachineBasicBlock* D,

+ const BitTestInfo& C):

+ First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),

+ Parent(P), Default(D), Cases(C) { }

+ APInt First;

+ APInt Range;

+ Value *SValue;

+ unsigned Reg;

+ bool Emitted;

+ MachineBasicBlock *Parent;

+ MachineBasicBlock *Default;

+ BitTestInfo Cases;

+ };

+public:

+ // TLI - This is information that describes the available target features we

+ // need for lowering. This indicates when operations are unavailable,

+ // implemented with a libcall, etc.

+ TargetLowering &TLI;

+ SelectionDAG &DAG;

+ const TargetData *TD;

+ AliasAnalysis *AA;

+ /// SwitchCases - Vector of CaseBlock structures used to communicate

+ /// SwitchInst code generation information.

+ std::vector<CaseBlock> SwitchCases;

+ /// JTCases - Vector of JumpTable structures used to communicate

+ /// SwitchInst code generation information.

+ std::vector<JumpTableBlock> JTCases;

+ /// BitTestCases - Vector of BitTestBlock structures used to communicate

+ /// SwitchInst code generation information.

+ std::vector<BitTestBlock> BitTestCases;

+ std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;

+ // Emit PHI-node-operand constants only once even if used by multiple

+ // PHI nodes.

+ DenseMap<Constant*, unsigned> ConstantsOut;

+ /// FuncInfo - Information about the function as a whole.

+ ///

+ FunctionLoweringInfo &FuncInfo;

+ /// OptLevel - What optimization level we're generating code for.

+ ///

+ CodeGenOpt::Level OptLevel;

+ /// GFI - Garbage collection metadata for the function.

+ GCFunctionInfo *GFI;

+ SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,

+ FunctionLoweringInfo &funcinfo,

+ CodeGenOpt::Level ol)

+ : CurDebugLoc(DebugLoc::getUnknownLoc()),

+ TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) {

+ }

+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);

+ /// clear - Clear out the curret SelectionDAG and the associated

+ /// state and prepare this SelectionDAGLowering object to be used

+ /// for a new block. This doesn't clear out information about

+ /// additional blocks that are needed to complete switch lowering

+ /// or PHI node updating; that information is cleared out as it is

+ /// consumed.

+ void clear();

+ /// getRoot - Return the current virtual root of the Selection DAG,

+ /// flushing any PendingLoad items. This must be done before emitting

+ /// a store or any other node that may need to be ordered after any

+ /// prior load instructions.

+ ///

+ SDValue getRoot();

+ /// getControlRoot - Similar to getRoot, but instead of flushing all the

+ /// PendingLoad items, flush all the PendingExports items. It is necessary

+ /// to do this before emitting a terminator instruction.

+ ///

+ SDValue getControlRoot();

+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }

+ void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }

+ void CopyValueToVirtualRegister(Value *V, unsigned Reg);

+ void visit(Instruction &I);

+ void visit(unsigned Opcode, User &I);

+ void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }

+ SDValue getValue(const Value *V);

+ void setValue(const Value *V, SDValue NewN) {

+ SDValue &N = NodeMap[V];

+ assert(N.getNode() == 0 && "Already set a value for this node!");

+ N = NewN;

+ }

+ void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,

+ std::set<unsigned> &OutputRegs,

+ std::set<unsigned> &InputRegs);

+ void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,

+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,

+ unsigned Opc);

+ void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,

+ MachineBasicBlock *FBB,

+ MachineBasicBlock *CurBB);

+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);

+ bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);

+ void CopyToExportRegsIfNeeded(Value *V);

+ void ExportFromCurrentBlock(Value *V);

+ void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,

+ MachineBasicBlock *LandingPad = NULL);

+private:

+ // Terminator instructions.

+ void visitRet(ReturnInst &I);

+ void visitBr(BranchInst &I);

+ void visitSwitch(SwitchInst &I);

+ void visitUnreachable(UnreachableInst &I) { /* noop */ }

+ // Helpers for visitSwitch

+ bool handleSmallSwitchRange(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default);

+ bool handleJTSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default);

+ bool handleBTSplitSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default);

+ bool handleBitTestsSwitchCase(CaseRec& CR,

+ CaseRecVector& WorkList,

+ Value* SV,

+ MachineBasicBlock* Default);

+public:

+ void visitSwitchCase(CaseBlock &CB);

+ void visitBitTestHeader(BitTestBlock &B);

+ void visitBitTestCase(MachineBasicBlock* NextMBB,

+ unsigned Reg,

+ BitTestCase &B);

+ void visitJumpTable(JumpTable &JT);

+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);

+private:

+ // These all get lowered before this pass.

+ void visitInvoke(InvokeInst &I);

+ void visitUnwind(UnwindInst &I);

+ void visitBinary(User &I, unsigned OpCode);

+ void visitShift(User &I, unsigned Opcode);

+ void visitAdd(User &I);

+ void visitSub(User &I);

+ void visitMul(User &I);

+ void visitURem(User &I) { visitBinary(I, ISD::UREM); }

+ void visitSRem(User &I) { visitBinary(I, ISD::SREM); }

+ void visitFRem(User &I) { visitBinary(I, ISD::FREM); }

+ void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }

+ void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }

+ void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }

+ void visitAnd (User &I) { visitBinary(I, ISD::AND); }

+ void visitOr (User &I) { visitBinary(I, ISD::OR); }

+ void visitXor (User &I) { visitBinary(I, ISD::XOR); }

+ void visitShl (User &I) { visitShift(I, ISD::SHL); }

+ void visitLShr(User &I) { visitShift(I, ISD::SRL); }

+ void visitAShr(User &I) { visitShift(I, ISD::SRA); }

+ void visitICmp(User &I);

+ void visitFCmp(User &I);

+ void visitVICmp(User &I);

+ void visitVFCmp(User &I);

+ // Visit the conversion instructions

+ void visitTrunc(User &I);

+ void visitZExt(User &I);

+ void visitSExt(User &I);

+ void visitFPTrunc(User &I);

+ void visitFPExt(User &I);

+ void visitFPToUI(User &I);

+ void visitFPToSI(User &I);

+ void visitUIToFP(User &I);

+ void visitSIToFP(User &I);

+ void visitPtrToInt(User &I);

+ void visitIntToPtr(User &I);

+ void visitBitCast(User &I);

+ void visitExtractElement(User &I);

+ void visitInsertElement(User &I);

+ void visitShuffleVector(User &I);

+ void visitExtractValue(ExtractValueInst &I);

+ void visitInsertValue(InsertValueInst &I);

+ void visitGetElementPtr(User &I);

+ void visitSelect(User &I);

+ void visitMalloc(MallocInst &I);

+ void visitFree(FreeInst &I);

+ void visitAlloca(AllocaInst &I);

+ void visitLoad(LoadInst &I);

+ void visitStore(StoreInst &I);

+ void visitPHI(PHINode &I) { } // PHI nodes are handled specially.

+ void visitCall(CallInst &I);

+ void visitInlineAsm(CallSite CS);

+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);

+ void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);

+ void visitPow(CallInst &I);

+ void visitExp2(CallInst &I);

+ void visitExp(CallInst &I);

+ void visitLog(CallInst &I);

+ void visitLog2(CallInst &I);

+ void visitLog10(CallInst &I);

+ void visitVAStart(CallInst &I);

+ void visitVAArg(VAArgInst &I);

+ void visitVAEnd(CallInst &I);

+ void visitVACopy(CallInst &I);

+ void visitUserOp1(Instruction &I) {

+ assert(0 && "UserOp1 should not exist at instruction selection time!");

+ abort();

+ }

+ void visitUserOp2(Instruction &I) {

+ assert(0 && "UserOp2 should not exist at instruction selection time!");

+ abort();

+ }

+ const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);

+ const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);

+};

+/// AddCatchInfo - Extract the personality and type infos from an eh.selector

+/// call, and add them to the specified machine basic block.

+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,

+ MachineBasicBlock *MBB);

+} // end namespace llvm

+#endif

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..9d72a128d18b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

@@ -0,0 +1,1347 @@

+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the SelectionDAGISel class.

+//

+//===----------------------------------------------------------------------===//

+#define DEBUG_TYPE "isel"

+#include "ScheduleDAGSDNodes.h"

+#include "SelectionDAGBuild.h"

+#include "llvm/CodeGen/SelectionDAGISel.h"

+#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Constants.h"

+#include "llvm/CallingConv.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/Function.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/InlineAsm.h"

+#include "llvm/Instructions.h"

+#include "llvm/Intrinsics.h"

+#include "llvm/IntrinsicInst.h"

+#include "llvm/CodeGen/FastISel.h"

+#include "llvm/CodeGen/GCStrategy.h"

+#include "llvm/CodeGen/GCMetadata.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/MachineInstrBuilder.h"

+#include "llvm/CodeGen/MachineJumpTableInfo.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

+#include "llvm/CodeGen/SchedulerRegistry.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/CodeGen/DwarfWriter.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetFrameInfo.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/MathExtras.h"

+#include "llvm/Support/Timer.h"

+#include <algorithm>

+using namespace llvm;

+static cl::opt<bool>

+DisableLegalizeTypes("disable-legalize-types", cl::Hidden);

+static cl::opt<bool>

+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,

+ cl::desc("Enable verbose messages in the \"fast\" "

+ "instruction selector"));

+static cl::opt<bool>

+EnableFastISelAbort("fast-isel-abort", cl::Hidden,

+ cl::desc("Enable abort calls when \"fast\" instruction fails"));

+static cl::opt<bool>

+SchedLiveInCopies("schedule-livein-copies",

+ cl::desc("Schedule copies of livein registers"),

+ cl::init(false));

+#ifndef NDEBUG

+static cl::opt<bool>

+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,

+ cl::desc("Pop up a window to show dags before the first "

+ "dag combine pass"));

+static cl::opt<bool>

+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,

+ cl::desc("Pop up a window to show dags before legalize types"));

+static cl::opt<bool>

+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,

+ cl::desc("Pop up a window to show dags before legalize"));

+static cl::opt<bool>

+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,

+ cl::desc("Pop up a window to show dags before the second "

+ "dag combine pass"));

+static cl::opt<bool>

+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,

+ cl::desc("Pop up a window to show dags before the post legalize types"

+ " dag combine pass"));

+static cl::opt<bool>

+ViewISelDAGs("view-isel-dags", cl::Hidden,

+ cl::desc("Pop up a window to show isel dags as they are selected"));

+static cl::opt<bool>

+ViewSchedDAGs("view-sched-dags", cl::Hidden,

+ cl::desc("Pop up a window to show sched dags as they are processed"));

+static cl::opt<bool>

+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,

+ cl::desc("Pop up a window to show SUnit dags after they are processed"));

+#else

+static const bool ViewDAGCombine1 = false,

+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,

+ ViewDAGCombine2 = false,

+ ViewDAGCombineLT = false,

+ ViewISelDAGs = false, ViewSchedDAGs = false,

+ ViewSUnitDAGs = false;

+#endif

+//===---------------------------------------------------------------------===//

+///

+/// RegisterScheduler class - Track the registration of instruction schedulers.

+///

+//===---------------------------------------------------------------------===//

+MachinePassRegistry RegisterScheduler::Registry;

+//===---------------------------------------------------------------------===//

+///

+/// ISHeuristic command line option for instruction schedulers.

+///

+//===---------------------------------------------------------------------===//

+static cl::opt<RegisterScheduler::FunctionPassCtor, false,

+ RegisterPassParser<RegisterScheduler> >

+ISHeuristic("pre-RA-sched",

+ cl::init(&createDefaultScheduler),

+ cl::desc("Instruction schedulers available (before register"

+ " allocation):"));

+static RegisterScheduler

+defaultListDAGScheduler("default", "Best scheduler for the target",

+ createDefaultScheduler);

+namespace llvm {

+ //===--------------------------------------------------------------------===//

+ /// createDefaultScheduler - This creates an instruction scheduler appropriate

+ /// for the target.

+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,

+ CodeGenOpt::Level OptLevel) {

+ const TargetLowering &TLI = IS->getTargetLowering();

+ if (OptLevel == CodeGenOpt::None)

+ return createFastDAGScheduler(IS, OptLevel);

+ if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)

+ return createTDListDAGScheduler(IS, OptLevel);

+ assert(TLI.getSchedulingPreference() ==

+ TargetLowering::SchedulingForRegPressure && "Unknown sched type!");

+ return createBURRListDAGScheduler(IS, OptLevel);

+ }

+// EmitInstrWithCustomInserter - This method should be implemented by targets

+// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These

+// instructions are special in various ways, which require special support to

+// insert. The specified MachineInstr is created but not inserted into any

+// basic blocks, and the scheduler passes ownership of it to this method.

+MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

+ MachineBasicBlock *MBB) const {

+ cerr << "If a target marks an instruction with "

+ << "'usesCustomDAGSchedInserter', it must implement "

+ << "TargetLowering::EmitInstrWithCustomInserter!\n";

+ abort();

+ return 0;

+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the

+/// physical register has only a single copy use, then coalesced the copy

+/// if possible.

+static void EmitLiveInCopy(MachineBasicBlock *MBB,

+ MachineBasicBlock::iterator &InsertPos,

+ unsigned VirtReg, unsigned PhysReg,

+ const TargetRegisterClass *RC,

+ DenseMap<MachineInstr*, unsigned> &CopyRegMap,

+ const MachineRegisterInfo &MRI,

+ const TargetRegisterInfo &TRI,

+ const TargetInstrInfo &TII) {

+ unsigned NumUses = 0;

+ MachineInstr *UseMI = NULL;

+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),

+ UE = MRI.use_end(); UI != UE; ++UI) {

+ UseMI = &*UI;

+ if (++NumUses > 1)

+ break;

+ }

+ // If the number of uses is not one, or the use is not a move instruction,

+ // don't coalesce. Also, only coalesce away a virtual register to virtual

+ // register copy.

+ bool Coalesced = false;

+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;

+ if (NumUses == 1 &&

+ TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&

+ TargetRegisterInfo::isVirtualRegister(DstReg)) {

+ VirtReg = DstReg;

+ Coalesced = true;

+ }

+ // Now find an ideal location to insert the copy.

+ MachineBasicBlock::iterator Pos = InsertPos;

+ while (Pos != MBB->begin()) {

+ MachineInstr *PrevMI = prior(Pos);

+ DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);

+ // copyRegToReg might emit multiple instructions to do a copy.

+ unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;

+ if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))

+ // This is what the BB looks like right now:

+ // r1024 = mov r0

+ // ...

+ // r1 = mov r1024

+ //

+ // We want to insert "r1025 = mov r1". Inserting this copy below the

+ // move to r1024 makes it impossible for that move to be coalesced.

+ //

+ // r1025 = mov r1

+ // r1024 = mov r0

+ // ...

+ // r1 = mov 1024

+ // r2 = mov 1025

+ break; // Woot! Found a good location.

+ --Pos;

+ }

+ TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);

+ CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));

+ if (Coalesced) {

+ if (&*InsertPos == UseMI) ++InsertPos;

+ MBB->erase(UseMI);

+ }

+/// EmitLiveInCopies - If this is the first basic block in the function,

+/// and if it has live ins that need to be copied into vregs, emit the

+/// copies into the block.

+static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,

+ const MachineRegisterInfo &MRI,

+ const TargetRegisterInfo &TRI,

+ const TargetInstrInfo &TII) {

+ if (SchedLiveInCopies) {

+ // Emit the copies at a heuristically-determined location in the block.

+ DenseMap<MachineInstr*, unsigned> CopyRegMap;

+ MachineBasicBlock::iterator InsertPos = EntryMBB->begin();

+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),

+ E = MRI.livein_end(); LI != E; ++LI)

+ if (LI->second) {

+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);

+ EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,

+ RC, CopyRegMap, MRI, TRI, TII);

+ }

+ } else {

+ // Emit the copies into the top of the block.

+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),

+ E = MRI.livein_end(); LI != E; ++LI)

+ if (LI->second) {

+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);

+ TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),

+ LI->second, LI->first, RC, RC);

+ }

+//===----------------------------------------------------------------------===//

+// SelectionDAGISel code

+//===----------------------------------------------------------------------===//

+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :

+ FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),

+ FuncInfo(new FunctionLoweringInfo(TLI)),

+ CurDAG(new SelectionDAG(TLI, *FuncInfo)),

+ SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),

+ GFI(),

+ OptLevel(OL),

+ DAGSize(0)

+{}

+SelectionDAGISel::~SelectionDAGISel() {

+ delete SDL;

+ delete CurDAG;

+ delete FuncInfo;

+unsigned SelectionDAGISel::MakeReg(MVT VT) {

+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));

+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {

+ AU.addRequired<AliasAnalysis>();

+ AU.addRequired<GCModuleInfo>();

+ AU.addRequired<DwarfWriter>();

+ AU.setPreservesAll();

+bool SelectionDAGISel::runOnFunction(Function &Fn) {

+ // Do some sanity-checking on the command-line options.

+ assert((!EnableFastISelVerbose || EnableFastISel) &&

+ "-fast-isel-verbose requires -fast-isel");

+ assert((!EnableFastISelAbort || EnableFastISel) &&

+ "-fast-isel-abort requires -fast-isel");

+ // Do not codegen any 'available_externally' functions at all, they have

+ // definitions outside the translation unit.

+ if (Fn.hasAvailableExternallyLinkage())

+ return false;

+ // Get alias analysis for load/store combining.

+ AA = &getAnalysis<AliasAnalysis>();

+ TargetMachine &TM = TLI.getTargetMachine();

+ MF = &MachineFunction::construct(&Fn, TM);

+ const TargetInstrInfo &TII = *TM.getInstrInfo();

+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();

+ if (MF->getFunction()->hasGC())

+ GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction());

+ else

+ GFI = 0;

+ RegInfo = &MF->getRegInfo();

+ DOUT << "\n\n\n=== " << Fn.getName() << "\n";

+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();

+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();

+ CurDAG->init(*MF, MMI, DW);

+ FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel);

+ SDL->init(GFI, *AA);

+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)

+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))

+ // Mark landing pad.

+ FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();

+ SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);

+ // If the first basic block in the function has live ins that need to be

+ // copied into vregs, emit the copies into the top of the block before

+ // emitting the code for the block.

+ EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);

+ // Add function live-ins to entry block live-in set.

+ for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),

+ E = RegInfo->livein_end(); I != E; ++I)

+ MF->begin()->addLiveIn(I->first);

+#ifndef NDEBUG

+ assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&

+ "Not all catch info was assigned to a landing pad!");

+#endif

+ FuncInfo->clear();

+ return true;

+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,

+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {

+ for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)

+ if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {

+ // Apply the catch info to DestBB.

+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);

+#ifndef NDEBUG

+ if (!FLI.MBBMap[SrcBB]->isLandingPad())

+ FLI.CatchInfoFound.insert(EHSel);

+#endif

+ }

+/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and

+/// whether object offset >= 0.

+static bool

+IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) {

+ if (!isa<FrameIndexSDNode>(Op)) return false;

+ FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);

+ int FrameIdx = FrameIdxNode->getIndex();

+ return MFI->isFixedObjectIndex(FrameIdx) &&

+ MFI->getObjectOffset(FrameIdx) >= 0;

+/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could

+/// possibly be overwritten when lowering the outgoing arguments in a tail

+/// call. Currently the implementation of this call is very conservative and

+/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with

+/// virtual registers would be overwritten by direct lowering.

+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op,

+ MachineFrameInfo *MFI) {

+ RegisterSDNode * OpReg = NULL;

+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||

+ (Op.getOpcode()== ISD::CopyFromReg &&

+ (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&

+ (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||

+ (Op.getOpcode() == ISD::LOAD &&

+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||

+ (Op.getOpcode() == ISD::MERGE_VALUES &&

+ Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD &&

+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()).

+ getOperand(1))))

+ return true;

+ return false;

+/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the

+/// DAG and fixes their tailcall attribute operand.

+static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,

+ const TargetLowering& TLI) {

+ SDNode * Ret = NULL;

+ SDValue Terminator = DAG.getRoot();

+ // Find RET node.

+ if (Terminator.getOpcode() == ISD::RET) {

+ Ret = Terminator.getNode();

+ }

+ // Fix tail call attribute of CALL nodes.

+ for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),

+ BI = DAG.allnodes_end(); BI != BE; ) {

+ --BI;

+ if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) {

+ SDValue OpRet(Ret, 0);

+ SDValue OpCall(BI, 0);

+ bool isMarkedTailCall = TheCall->isTailCall();

+ // If CALL node has tail call attribute set to true and the call is not

+ // eligible (no RET or the target rejects) the attribute is fixed to

+ // false. The TargetLowering::IsEligibleForTailCallOptimization function

+ // must correctly identify tail call optimizable calls.

+ if (!isMarkedTailCall) continue;

+ if (Ret==NULL ||

+ !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) {

+ // Not eligible. Mark CALL node as non tail call. Note that we

+ // can modify the call node in place since calls are not CSE'd.

+ TheCall->setNotTailCall();

+ } else {

+ // Look for tail call clobbered arguments. Emit a series of

+ // copyto/copyfrom virtual register nodes to protect them.

+ SmallVector<SDValue, 32> Ops;

+ SDValue Chain = TheCall->getChain(), InFlag;

+ Ops.push_back(Chain);

+ Ops.push_back(TheCall->getCallee());

+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {

+ SDValue Arg = TheCall->getArg(i);

+ bool isByVal = TheCall->getArgFlags(i).isByVal();

+ MachineFunction &MF = DAG.getMachineFunction();

+ MachineFrameInfo *MFI = MF.getFrameInfo();

+ if (!isByVal &&

+ IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {

+ MVT VT = Arg.getValueType();

+ unsigned VReg = MF.getRegInfo().

+ createVirtualRegister(TLI.getRegClassFor(VT));

+ Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(),

+ VReg, Arg, InFlag);

+ InFlag = Chain.getValue(1);

+ Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(),

+ VReg, VT, InFlag);

+ Chain = Arg.getValue(1);

+ InFlag = Arg.getValue(2);

+ }

+ Ops.push_back(Arg);

+ Ops.push_back(TheCall->getArgFlagsVal(i));

+ }

+ // Link in chain of CopyTo/CopyFromReg.

+ Ops[0] = Chain;

+ DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());

+ }

+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,

+ BasicBlock::iterator Begin,

+ BasicBlock::iterator End) {

+ SDL->setCurrentBasicBlock(BB);

+ // Lower all of the non-terminator instructions.

+ for (BasicBlock::iterator I = Begin; I != End; ++I)

+ if (!isa<TerminatorInst>(I))

+ SDL->visit(*I);

+ // Ensure that all instructions which are used outside of their defining

+ // blocks are available as virtual registers. Invoke is handled elsewhere.

+ for (BasicBlock::iterator I = Begin; I != End; ++I)

+ if (!isa<PHINode>(I) && !isa<InvokeInst>(I))

+ SDL->CopyToExportRegsIfNeeded(I);

+ // Handle PHI nodes in successor blocks.

+ if (End == LLVMBB->end()) {

+ HandlePHINodesInSuccessorBlocks(LLVMBB);

+ // Lower the terminator after the copies are emitted.

+ SDL->visit(*LLVMBB->getTerminator());

+ }

+ // Make sure the root of the DAG is up-to-date.

+ CurDAG->setRoot(SDL->getControlRoot());

+ // Check whether calls in this block are real tail calls. Fix up CALL nodes

+ // with correct tailcall attribute so that the target can rely on the tailcall

+ // attribute indicating whether the call is really eligible for tail call

+ // optimization.

+ if (PerformTailCallOpt)

+ CheckDAGForTailCallsAndFixThem(*CurDAG, TLI);

+ // Final step, emit the lowered DAG as machine code.

+ CodeGenAndEmitDAG();

+ SDL->clear();

+void SelectionDAGISel::ComputeLiveOutVRegInfo() {

+ SmallPtrSet<SDNode*, 128> VisitedNodes;

+ SmallVector<SDNode*, 128> Worklist;

+ Worklist.push_back(CurDAG->getRoot().getNode());

+ APInt Mask;

+ APInt KnownZero;

+ APInt KnownOne;

+ while (!Worklist.empty()) {

+ SDNode *N = Worklist.back();

+ Worklist.pop_back();

+ // If we've already seen this node, ignore it.

+ if (!VisitedNodes.insert(N))

+ continue;

+ // Otherwise, add all chain operands to the worklist.

+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)

+ if (N->getOperand(i).getValueType() == MVT::Other)

+ Worklist.push_back(N->getOperand(i).getNode());

+ // If this is a CopyToReg with a vreg dest, process it.

+ if (N->getOpcode() != ISD::CopyToReg)

+ continue;

+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();

+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))

+ continue;

+ // Ignore non-scalar or non-integer values.

+ SDValue Src = N->getOperand(2);

+ MVT SrcVT = Src.getValueType();

+ if (!SrcVT.isInteger() || SrcVT.isVector())

+ continue;

+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);

+ Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());

+ CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);

+ // Only install this information if it tells us something.

+ if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {

+ DestReg -= TargetRegisterInfo::FirstVirtualRegister;

+ FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo();

+ if (DestReg >= FLI.LiveOutRegInfo.size())

+ FLI.LiveOutRegInfo.resize(DestReg+1);

+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg];

+ LOI.NumSignBits = NumSignBits;

+ LOI.KnownOne = KnownOne;

+ LOI.KnownZero = KnownZero;

+ }

+void SelectionDAGISel::CodeGenAndEmitDAG() {

+ std::string GroupName;

+ if (TimePassesIsEnabled)

+ GroupName = "Instruction Selection and Scheduling";

+ std::string BlockName;

+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||

+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||

+ ViewSUnitDAGs)

+ BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' +

+ BB->getBasicBlock()->getName();

+ DOUT << "Initial selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);

+ // Run the DAG combiner in pre-legalize mode.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("DAG Combining 1", GroupName);

+ CurDAG->Combine(Unrestricted, *AA, OptLevel);

+ } else {

+ CurDAG->Combine(Unrestricted, *AA, OptLevel);

+ }

+ DOUT << "Optimized lowered selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ // Second step, hack on the DAG until it only uses operations and types that

+ // the target supports.

+ if (!DisableLegalizeTypes) {

+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +

+ BlockName);

+ bool Changed;

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Type Legalization", GroupName);

+ Changed = CurDAG->LegalizeTypes();

+ } else {

+ Changed = CurDAG->LegalizeTypes();

+ }

+ DOUT << "Type-legalized selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ if (Changed) {

+ if (ViewDAGCombineLT)

+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);

+ // Run the DAG combiner in post-type-legalize mode.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("DAG Combining after legalize types", GroupName);

+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);

+ } else {

+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);

+ }

+ DOUT << "Optimized type-legalized selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ }

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Vector Legalization", GroupName);

+ Changed = CurDAG->LegalizeVectors();

+ } else {

+ Changed = CurDAG->LegalizeVectors();

+ }

+ if (Changed) {

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Type Legalization 2", GroupName);

+ Changed = CurDAG->LegalizeTypes();

+ } else {

+ Changed = CurDAG->LegalizeTypes();

+ }

+ if (ViewDAGCombineLT)

+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);

+ // Run the DAG combiner in post-type-legalize mode.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);

+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);

+ } else {

+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);

+ }

+ DOUT << "Optimized vector-legalized selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ }

+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("DAG Legalization", GroupName);

+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);

+ } else {

+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);

+ }

+ DOUT << "Legalized selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);

+ // Run the DAG combiner in post-legalize mode.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("DAG Combining 2", GroupName);

+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);

+ } else {

+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);

+ }

+ DOUT << "Optimized legalized selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);

+ if (OptLevel != CodeGenOpt::None)

+ ComputeLiveOutVRegInfo();

+ // Third, instruction select all of the operations to machine code, adding the

+ // code to the MachineBasicBlock.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Instruction Selection", GroupName);

+ InstructionSelect();

+ } else {

+ InstructionSelect();

+ }

+ DOUT << "Selected selection DAG:\n";

+ DEBUG(CurDAG->dump());

+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);

+ // Schedule machine code.

+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Instruction Scheduling", GroupName);

+ Scheduler->Run(CurDAG, BB, BB->end());

+ } else {

+ Scheduler->Run(CurDAG, BB, BB->end());

+ }

+ if (ViewSUnitDAGs) Scheduler->viewGraph();

+ // Emit machine code to BB. This can change 'BB' to the last block being

+ // inserted into.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Instruction Creation", GroupName);

+ BB = Scheduler->EmitSchedule();

+ } else {

+ BB = Scheduler->EmitSchedule();

+ }

+ // Free the scheduler state.

+ if (TimePassesIsEnabled) {

+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);

+ delete Scheduler;

+ } else {

+ delete Scheduler;

+ }

+ DOUT << "Selected machine code:\n";

+ DEBUG(BB->dump());

+void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,

+ MachineFunction &MF,

+ MachineModuleInfo *MMI,

+ DwarfWriter *DW,

+ const TargetInstrInfo &TII) {

+ // Initialize the Fast-ISel state, if needed.

+ FastISel *FastIS = 0;

+ if (EnableFastISel)

+ FastIS = TLI.createFastISel(MF, MMI, DW,

+ FuncInfo->ValueMap,

+ FuncInfo->MBBMap,

+ FuncInfo->StaticAllocaMap

+#ifndef NDEBUG

+ , FuncInfo->CatchInfoLost

+#endif

+ );

+ // Iterate over all basic blocks in the function.

+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {

+ BasicBlock *LLVMBB = &*I;

+ BB = FuncInfo->MBBMap[LLVMBB];

+ BasicBlock::iterator const Begin = LLVMBB->begin();

+ BasicBlock::iterator const End = LLVMBB->end();

+ BasicBlock::iterator BI = Begin;

+ // Lower any arguments needed in this block if this is the entry block.

+ bool SuppressFastISel = false;

+ if (LLVMBB == &Fn.getEntryBlock()) {

+ LowerArguments(LLVMBB);

+ // If any of the arguments has the byval attribute, forgo

+ // fast-isel in the entry block.

+ if (FastIS) {

+ unsigned j = 1;

+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();

+ I != E; ++I, ++j)

+ if (Fn.paramHasAttr(j, Attribute::ByVal)) {

+ if (EnableFastISelVerbose || EnableFastISelAbort)

+ cerr << "FastISel skips entry block due to byval argument\n";

+ SuppressFastISel = true;

+ break;

+ }

+ if (MMI && BB->isLandingPad()) {

+ // Add a label to mark the beginning of the landing pad. Deletion of the

+ // landing pad can thus be detected via the MachineModuleInfo.

+ unsigned LabelID = MMI->addLandingPad(BB);

+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);

+ BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID);

+ // Mark exception register as live in.

+ unsigned Reg = TLI.getExceptionAddressRegister();

+ if (Reg) BB->addLiveIn(Reg);

+ // Mark exception selector register as live in.

+ Reg = TLI.getExceptionSelectorRegister();

+ if (Reg) BB->addLiveIn(Reg);

+ // FIXME: Hack around an exception handling flaw (PR1508): the personality

+ // function and list of typeids logically belong to the invoke (or, if you

+ // like, the basic block containing the invoke), and need to be associated

+ // with it in the dwarf exception handling tables. Currently however the

+ // information is provided by an intrinsic (eh.selector) that can be moved

+ // to unexpected places by the optimizers: if the unwind edge is critical,

+ // then breaking it can result in the intrinsics being in the successor of

+ // the landing pad, not the landing pad itself. This results in exceptions

+ // not being caught because no typeids are associated with the invoke.

+ // This may not be the only way things can go wrong, but it is the only way

+ // we try to work around for the moment.

+ BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());

+ if (Br && Br->isUnconditional()) { // Critical edge?

+ BasicBlock::iterator I, E;

+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)

+ if (isa<EHSelectorInst>(I))

+ break;

+ if (I == E)

+ // No catch info found - try to extract some from the successor.

+ copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);

+ }

+ // Before doing SelectionDAG ISel, see if FastISel has been requested.

+ if (FastIS && !SuppressFastISel) {

+ // Emit code for any incoming arguments. This must happen before

+ // beginning FastISel on the entry block.

+ if (LLVMBB == &Fn.getEntryBlock()) {

+ CurDAG->setRoot(SDL->getControlRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ }

+ FastIS->startNewBlock(BB);

+ // Do FastISel on as many instructions as possible.

+ for (; BI != End; ++BI) {

+ // Just before the terminator instruction, insert instructions to

+ // feed PHI nodes in successor blocks.

+ if (isa<TerminatorInst>(BI))

+ if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {

+ if (EnableFastISelVerbose || EnableFastISelAbort) {

+ cerr << "FastISel miss: ";

+ BI->dump();

+ }

+ if (EnableFastISelAbort)

+ assert(0 && "FastISel didn't handle a PHI in a successor");

+ break;

+ }

+ // First try normal tablegen-generated "fast" selection.

+ if (FastIS->SelectInstruction(BI))

+ continue;

+ // Next, try calling the target to attempt to handle the instruction.

+ if (FastIS->TargetSelectInstruction(BI))

+ continue;

+ // Then handle certain instructions as single-LLVM-Instruction blocks.

+ if (isa<CallInst>(BI)) {

+ if (EnableFastISelVerbose || EnableFastISelAbort) {

+ cerr << "FastISel missed call: ";

+ BI->dump();

+ }

+ if (BI->getType() != Type::VoidTy) {

+ unsigned &R = FuncInfo->ValueMap[BI];

+ if (!R)

+ R = FuncInfo->CreateRegForValue(BI);

+ }

+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());

+ SelectBasicBlock(LLVMBB, BI, next(BI));

+ // If the instruction was codegen'd with multiple blocks,

+ // inform the FastISel object where to resume inserting.

+ FastIS->setCurrentBlock(BB);

+ continue;

+ }

+ // Otherwise, give up on FastISel for the rest of the block.

+ // For now, be a little lenient about non-branch terminators.

+ if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {

+ if (EnableFastISelVerbose || EnableFastISelAbort) {

+ cerr << "FastISel miss: ";

+ BI->dump();

+ }

+ if (EnableFastISelAbort)

+ // The "fast" selector couldn't handle something and bailed.

+ // For the purpose of debugging, just abort.

+ assert(0 && "FastISel didn't select the entire block");

+ }

+ break;

+ }

+ // Run SelectionDAG instruction selection on the remainder of the block

+ // not handled by FastISel. If FastISel is not run, this is the entire

+ // block.

+ if (BI != End) {

+ // If FastISel is run and it has known DebugLoc then use it.

+ if (FastIS && !FastIS->getCurDebugLoc().isUnknown())

+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());

+ SelectBasicBlock(LLVMBB, BI, End);

+ }

+ FinishBasicBlock();

+ }

+ delete FastIS;

+void

+SelectionDAGISel::FinishBasicBlock() {

+ DOUT << "Target-post-processed machine code:\n";

+ DEBUG(BB->dump());

+ DOUT << "Total amount of phi nodes to update: "

+ << SDL->PHINodesToUpdate.size() << "\n";

+ DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)

+ DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first

+ << ", " << SDL->PHINodesToUpdate[i].second << ")\n";);

+ // Next, now that we know what the last MBB the LLVM BB expanded is, update

+ // PHI nodes in successors.

+ if (SDL->SwitchCases.empty() &&

+ SDL->JTCases.empty() &&

+ SDL->BitTestCases.empty()) {

+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {

+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;

+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&

+ "This is not a machine PHI node that we are updating!");

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(BB));

+ }

+ SDL->PHINodesToUpdate.clear();

+ return;

+ }

+ for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) {

+ // Lower header first, if it wasn't already lowered

+ if (!SDL->BitTestCases[i].Emitted) {

+ // Set the current basic block to the mbb we wish to insert the code into

+ BB = SDL->BitTestCases[i].Parent;

+ SDL->setCurrentBasicBlock(BB);

+ // Emit the code

+ SDL->visitBitTestHeader(SDL->BitTestCases[i]);

+ CurDAG->setRoot(SDL->getRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ }

+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {

+ // Set the current basic block to the mbb we wish to insert the code into

+ BB = SDL->BitTestCases[i].Cases[j].ThisBB;

+ SDL->setCurrentBasicBlock(BB);

+ // Emit the code

+ if (j+1 != ej)

+ SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB,

+ SDL->BitTestCases[i].Reg,

+ SDL->BitTestCases[i].Cases[j]);

+ else

+ SDL->visitBitTestCase(SDL->BitTestCases[i].Default,

+ SDL->BitTestCases[i].Reg,

+ SDL->BitTestCases[i].Cases[j]);

+ CurDAG->setRoot(SDL->getRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ }

+ // Update PHI Nodes

+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {

+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;

+ MachineBasicBlock *PHIBB = PHI->getParent();

+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&

+ "This is not a machine PHI node that we are updating!");

+ // This is "default" BB. We have two jumps to it. From "header" BB and

+ // from last "case" BB.

+ if (PHIBB == SDL->BitTestCases[i].Default) {

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent));

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases.

+ back().ThisBB));

+ }

+ // One of "cases" BB.

+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size();

+ j != ej; ++j) {

+ MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB;

+ if (cBB->succ_end() !=

+ std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(cBB));

+ }

+ SDL->BitTestCases.clear();

+ // If the JumpTable record is filled in, then we need to emit a jump table.

+ // Updating the PHI nodes is tricky in this case, since we need to determine

+ // whether the PHI is a successor of the range check MBB or the jump table MBB

+ for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) {

+ // Lower header first, if it wasn't already lowered

+ if (!SDL->JTCases[i].first.Emitted) {

+ // Set the current basic block to the mbb we wish to insert the code into

+ BB = SDL->JTCases[i].first.HeaderBB;

+ SDL->setCurrentBasicBlock(BB);

+ // Emit the code

+ SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first);

+ CurDAG->setRoot(SDL->getRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ }

+ // Set the current basic block to the mbb we wish to insert the code into

+ BB = SDL->JTCases[i].second.MBB;

+ SDL->setCurrentBasicBlock(BB);

+ // Emit the code

+ SDL->visitJumpTable(SDL->JTCases[i].second);

+ CurDAG->setRoot(SDL->getRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ // Update PHI Nodes

+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {

+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;

+ MachineBasicBlock *PHIBB = PHI->getParent();

+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&

+ "This is not a machine PHI node that we are updating!");

+ // "default" BB. We can go there only from header BB.

+ if (PHIBB == SDL->JTCases[i].second.Default) {

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));

+ }

+ // JT BB. Just iterate over successors here

+ if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(BB));

+ }

+ SDL->JTCases.clear();

+ // If the switch block involved a branch to one of the actual successors, we

+ // need to update PHI nodes in that block.

+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {

+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;

+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&

+ "This is not a machine PHI node that we are updating!");

+ if (BB->isSuccessor(PHI->getParent())) {

+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,

+ false));

+ PHI->addOperand(MachineOperand::CreateMBB(BB));

+ }

+ // If we generated any switch lowering information, build and codegen any

+ // additional DAGs necessary.

+ for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {

+ // Set the current basic block to the mbb we wish to insert the code into

+ BB = SDL->SwitchCases[i].ThisBB;

+ SDL->setCurrentBasicBlock(BB);

+ // Emit the code

+ SDL->visitSwitchCase(SDL->SwitchCases[i]);

+ CurDAG->setRoot(SDL->getRoot());

+ CodeGenAndEmitDAG();

+ SDL->clear();

+ // Handle any PHI nodes in successors of this chunk, as if we were coming

+ // from the original BB before switch expansion. Note that PHI nodes can

+ // occur multiple times in PHINodesToUpdate. We have to be very careful to

+ // handle them the right number of times.

+ while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS.

+ for (MachineBasicBlock::iterator Phi = BB->begin();

+ Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){

+ // This value for this PHI node is recorded in PHINodesToUpdate, get it.

+ for (unsigned pn = 0; ; ++pn) {

+ assert(pn != SDL->PHINodesToUpdate.size() &&

+ "Didn't find PHI entry!");

+ if (SDL->PHINodesToUpdate[pn].first == Phi) {

+ Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].

+ second, false));

+ Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB));

+ break;

+ }

+ // Don't process RHS if same block as LHS.

+ if (BB == SDL->SwitchCases[i].FalseBB)

+ SDL->SwitchCases[i].FalseBB = 0;

+ // If we haven't handled the RHS, do so now. Otherwise, we're done.

+ SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;

+ SDL->SwitchCases[i].FalseBB = 0;

+ }

+ assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);

+ }

+ SDL->SwitchCases.clear();

+ SDL->PHINodesToUpdate.clear();

+/// Create the scheduler. If a specific scheduler was specified

+/// via the SchedulerRegistry, use it, otherwise select the

+/// one preferred by the target.

+///

+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {

+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();

+ if (!Ctor) {

+ Ctor = ISHeuristic;

+ RegisterScheduler::setDefault(Ctor);

+ }

+ return Ctor(this, OptLevel);

+ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {

+ return new ScheduleHazardRecognizer();

+//===----------------------------------------------------------------------===//

+// Helper functions used by the generated instruction selector.

+//===----------------------------------------------------------------------===//

+// Calls to these methods are generated by tblgen.

+/// CheckAndMask - The isel is trying to match something like (and X, 255). If

+/// the dag combiner simplified the 255, we still want to match. RHS is the

+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value

+/// specified in the .td file (e.g. 255).

+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,

+ int64_t DesiredMaskS) const {

+ const APInt &ActualMask = RHS->getAPIntValue();

+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);

+ // If the actual mask exactly matches, success!

+ if (ActualMask == DesiredMask)

+ return true;

+ // If the actual AND mask is allowing unallowed bits, this doesn't match.

+ if (ActualMask.intersects(~DesiredMask))

+ return false;

+ // Otherwise, the DAG Combiner may have proven that the value coming in is

+ // either already zero or is not demanded. Check for known zero input bits.

+ APInt NeededMask = DesiredMask & ~ActualMask;

+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))

+ return true;

+ // TODO: check to see if missing bits are just not demanded.

+ // Otherwise, this pattern doesn't match.

+ return false;

+/// CheckOrMask - The isel is trying to match something like (or X, 255). If

+/// the dag combiner simplified the 255, we still want to match. RHS is the

+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value

+/// specified in the .td file (e.g. 255).

+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,

+ int64_t DesiredMaskS) const {

+ const APInt &ActualMask = RHS->getAPIntValue();

+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);

+ // If the actual mask exactly matches, success!

+ if (ActualMask == DesiredMask)

+ return true;

+ // If the actual AND mask is allowing unallowed bits, this doesn't match.

+ if (ActualMask.intersects(~DesiredMask))

+ return false;

+ // Otherwise, the DAG Combiner may have proven that the value coming in is

+ // either already zero or is not demanded. Check for known zero input bits.

+ APInt NeededMask = DesiredMask & ~ActualMask;

+ APInt KnownZero, KnownOne;

+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);

+ // If all the missing bits in the or are already known to be set, match!

+ if ((NeededMask & KnownOne) == NeededMask)

+ return true;

+ // TODO: check to see if missing bits are just not demanded.

+ // Otherwise, this pattern doesn't match.

+ return false;

+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated

+/// by tblgen. Others should not call it.

+void SelectionDAGISel::

+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {

+ std::vector<SDValue> InOps;

+ std::swap(InOps, Ops);

+ Ops.push_back(InOps[0]); // input chain.

+ Ops.push_back(InOps[1]); // input asm string.

+ unsigned i = 2, e = InOps.size();

+ if (InOps[e-1].getValueType() == MVT::Flag)

+ --e; // Don't process a flag operand if it is here.

+ while (i != e) {

+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();

+ if ((Flags & 7) != 4 /*MEM*/) {

+ // Just skip over this operand, copying the operands verbatim.

+ Ops.insert(Ops.end(), InOps.begin()+i,

+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);

+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;

+ } else {

+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&

+ "Memory operand with multiple values?");

+ // Otherwise, this is a memory operand. Ask the target to select it.

+ std::vector<SDValue> SelOps;

+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {

+ cerr << "Could not match memory address. Inline asm failure!\n";

+ exit(1);

+ }

+ // Add this to the output node.

+ MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();

+ Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),

+ IntPtrTy));

+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());

+ i += 2;

+ }

+ // Add the flag input back if present.

+ if (e != InOps.size())

+ Ops.push_back(InOps.back());

+/// findFlagUse - Return use of MVT::Flag value produced by the specified

+/// SDNode.

+///

+static SDNode *findFlagUse(SDNode *N) {

+ unsigned FlagResNo = N->getNumValues()-1;

+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {

+ SDUse &Use = I.getUse();

+ if (Use.getResNo() == FlagResNo)

+ return Use.getUser();

+ }

+ return NULL;

+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".

+/// This function recursively traverses up the operand chain, ignoring

+/// certain nodes.

+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,

+ SDNode *Root,

+ SmallPtrSet<SDNode*, 16> &Visited) {

+ if (Use->getNodeId() < Def->getNodeId() ||

+ !Visited.insert(Use))

+ return false;

+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {

+ SDNode *N = Use->getOperand(i).getNode();

+ if (N == Def) {

+ if (Use == ImmedUse || Use == Root)

+ continue; // We are not looking for immediate use.

+ assert(N != Root);

+ return true;

+ }

+ // Traverse up the operand chain.

+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited))

+ return true;

+ }

+ return false;

+/// isNonImmUse - Start searching from Root up the DAG to check is Def can

+/// be reached. Return true if that's the case. However, ignore direct uses

+/// by ImmedUse (which would be U in the example illustrated in

+/// IsLegalAndProfitableToFold) and by Root (which can happen in the store

+/// case).

+/// FIXME: to be really generic, we should allow direct use by any node

+/// that is being folded. But realisticly since we only fold loads which

+/// have one non-chain use, we only need to watch out for load/op/store

+/// and load/op/cmp case where the root (store / cmp) may reach the load via

+/// its chain operand.

+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {

+ SmallPtrSet<SDNode*, 16> Visited;

+ return findNonImmUse(Root, Def, ImmedUse, Root, Visited);

+/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of

+/// U can be folded during instruction selection that starts at Root and

+/// folding N is profitable.

+bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,

+ SDNode *Root) const {

+ if (OptLevel == CodeGenOpt::None) return false;

+ // If Root use can somehow reach N through a path that that doesn't contain

+ // U then folding N would create a cycle. e.g. In the following

+ // diagram, Root can reach N through X. If N is folded into into Root, then

+ // X is both a predecessor and a successor of U.

+ //

+ // [N*] //

+ // ^ ^ //

+ // / \ //

+ // [U*] [X]? //

+ // ^ ^ //

+ // \ / //

+ // [Root*] //

+ //

+ // * indicates nodes to be folded together.

+ //

+ // If Root produces a flag, then it gets (even more) interesting. Since it

+ // will be "glued" together with its flag use in the scheduler, we need to

+ // check if it might reach N.

+ //

+ // [N*] //

+ // ^ ^ //

+ // / \ //

+ // [U*] [X]? //

+ // ^ ^ //

+ // \ \ //

+ // \ | //

+ // [Root*] | //

+ // ^ | //

+ // f | //

+ // | / //

+ // [Y] / //

+ // ^ / //

+ // f / //

+ // | / //

+ // [FU] //

+ //

+ // If FU (flag use) indirectly reaches N (the load), and Root folds N

+ // (call it Fold), then X is a predecessor of FU and a successor of

+ // Fold. But since Fold and FU are flagged together, this will create

+ // a cycle in the scheduling graph.

+ MVT VT = Root->getValueType(Root->getNumValues()-1);

+ while (VT == MVT::Flag) {

+ SDNode *FU = findFlagUse(Root);

+ if (FU == NULL)

+ break;

+ Root = FU;

+ VT = Root->getValueType(Root->getNumValues()-1);

+ }

+ return !isNonImmUse(Root, N, U);

+char SelectionDAGISel::ID = 0;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..3eec684c6f8c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp

@@ -0,0 +1,416 @@

+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the SelectionDAG::viewGraph method.

+//

+//===----------------------------------------------------------------------===//

+#include "ScheduleDAGSDNodes.h"

+#include "llvm/Constants.h"

+#include "llvm/Function.h"

+#include "llvm/Assembly/Writer.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/CodeGen/MachineConstantPool.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineModuleInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/Analysis/DebugInfo.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/GraphWriter.h"

+#include "llvm/Support/raw_ostream.h"

+#include "llvm/ADT/DenseSet.h"

+#include "llvm/ADT/StringExtras.h"

+#include "llvm/Config/config.h"

+#include <fstream>

+using namespace llvm;

+namespace llvm {

+ template<>

+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {

+ static bool hasEdgeDestLabels() {

+ return true;

+ }

+ static unsigned numEdgeDestLabels(const void *Node) {

+ return ((const SDNode *) Node)->getNumValues();

+ }

+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {

+ return ((const SDNode *) Node)->getValueType(i).getMVTString();

+ }

+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge

+ /// should actually target another edge source, not a node. If this method is

+ /// implemented, getEdgeTarget should be implemented.

+ template<typename EdgeIter>

+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {

+ return true;

+ }

+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is

+ /// called to determine which outgoing edge of Node is the target of this

+ /// edge.

+ template<typename EdgeIter>

+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {

+ SDNode *TargetNode = *I;

+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);

+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());

+ return NI;

+ }

+ static std::string getGraphName(const SelectionDAG *G) {

+ return G->getMachineFunction().getFunction()->getName();

+ }

+ static bool renderGraphFromBottomUp() {

+ return true;

+ }

+ static bool hasNodeAddressLabel(const SDNode *Node,

+ const SelectionDAG *Graph) {

+ return true;

+ }

+ /// If you want to override the dot attributes printed for a particular

+ /// edge, override this method.

+ template<typename EdgeIter>

+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {

+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());

+ MVT VT = Op.getValueType();

+ if (VT == MVT::Flag)

+ return "color=red,style=bold";

+ else if (VT == MVT::Other)

+ return "color=blue,style=dashed";

+ return "";

+ }

+ static std::string getNodeLabel(const SDNode *Node,

+ const SelectionDAG *Graph);

+ static std::string getNodeAttributes(const SDNode *N,

+ const SelectionDAG *Graph) {

+#ifndef NDEBUG

+ const std::string &Attrs = Graph->getGraphAttrs(N);

+ if (!Attrs.empty()) {

+ if (Attrs.find("shape=") == std::string::npos)

+ return std::string("shape=Mrecord,") + Attrs;

+ else

+ return Attrs;

+ }

+#endif

+ return "shape=Mrecord";

+ }

+ static void addCustomGraphFeatures(SelectionDAG *G,

+ GraphWriter<SelectionDAG*> &GW) {

+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");

+ if (G->getRoot().getNode())

+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),

+ "color=blue,style=dashed");

+ }

+ };

+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,

+ const SelectionDAG *G) {

+ std::string Op = Node->getOperationName(G);

+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {

+ Op += ": " + utostr(CSDN->getZExtValue());

+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {

+ Op += ": " + ftostr(CSDN->getValueAPF());

+ } else if (const GlobalAddressSDNode *GADN =

+ dyn_cast<GlobalAddressSDNode>(Node)) {

+ Op += ": " + GADN->getGlobal()->getName();

+ if (int64_t Offset = GADN->getOffset()) {

+ if (Offset > 0)

+ Op += "+" + itostr(Offset);

+ else

+ Op += itostr(Offset);

+ }

+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {

+ Op += " " + itostr(FIDN->getIndex());

+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {

+ Op += " " + itostr(JTDN->getIndex());

+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){

+ if (CP->isMachineConstantPoolEntry()) {

+ Op += '<';

+ {

+ raw_string_ostream OSS(Op);

+ OSS << *CP->getMachineCPVal();

+ }

+ Op += '>';

+ } else {

+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

+ Op += "<" + ftostr(CFP->getValueAPF()) + ">";

+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))

+ Op += "<" + utostr(CI->getZExtValue()) + ">";

+ else {

+ Op += '<';

+ {

+ raw_string_ostream OSS(Op);

+ WriteAsOperand(OSS, CP->getConstVal(), false);

+ }

+ Op += '>';

+ }

+ Op += " A=" + itostr(CP->getAlignment());

+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {

+ Op = "BB: ";

+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();

+ if (LBB)

+ Op += LBB->getName();

+ //Op += " " + (const void*)BBDN->getBasicBlock();

+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {

+ if (G && R->getReg() != 0 &&

+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {

+ Op = Op + " " +

+ G->getTarget().getRegisterInfo()->getName(R->getReg());

+ } else {

+ Op += " #" + utostr(R->getReg());

+ }

+ } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) {

+ DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit()));

+ std::string FN;

+ Op += ": " + CU.getFilename(FN);

+ Op += ":" + utostr(D->getLine());

+ if (D->getColumn() != 0)

+ Op += ":" + utostr(D->getColumn());

+ } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) {

+ Op += ": LabelID=" + utostr(L->getLabelID());

+ } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) {

+ Op += ": CallingConv=" + utostr(C->getCallingConv());

+ if (C->isVarArg())

+ Op += ", isVarArg";

+ if (C->isTailCall())

+ Op += ", isTailCall";

+ } else if (const ExternalSymbolSDNode *ES =

+ dyn_cast<ExternalSymbolSDNode>(Node)) {

+ Op += "'" + std::string(ES->getSymbol()) + "'";

+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {

+ if (M->getValue())

+ Op += "<" + M->getValue()->getName() + ">";

+ else

+ Op += "<null>";

+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) {

+ const Value *V = M->MO.getValue();

+ Op += '<';

+ if (!V) {

+ Op += "(unknown)";

+ } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {

+ // PseudoSourceValues don't have names, so use their print method.

+ raw_string_ostream OSS(Op);

+ PSV->print(OSS);

+ } else {

+ Op += V->getName();

+ }

+ Op += '+' + itostr(M->MO.getOffset()) + '>';

+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) {

+ Op = Op + " AF=" + N->getArgFlags().getArgFlagsString();

+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {

+ Op = Op + " VT=" + N->getVT().getMVTString();

+ } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {

+ bool doExt = true;

+ switch (LD->getExtensionType()) {

+ default: doExt = false; break;

+ case ISD::EXTLOAD:

+ Op = Op + "<anyext ";

+ break;

+ case ISD::SEXTLOAD:

+ Op = Op + " <sext ";

+ break;

+ case ISD::ZEXTLOAD:

+ Op = Op + " <zext ";

+ break;

+ }

+ if (doExt)

+ Op += LD->getMemoryVT().getMVTString() + ">";

+ if (LD->isVolatile())

+ Op += "<V>";

+ Op += LD->getIndexedModeName(LD->getAddressingMode());

+ if (LD->getAlignment() > 1)

+ Op += " A=" + utostr(LD->getAlignment());

+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {

+ if (ST->isTruncatingStore())

+ Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">";

+ if (ST->isVolatile())

+ Op += "<V>";

+ Op += ST->getIndexedModeName(ST->getAddressingMode());

+ if (ST->getAlignment() > 1)

+ Op += " A=" + utostr(ST->getAlignment());

+ }

+#if 0

+ Op += " Id=" + itostr(Node->getNodeId());

+#endif

+ return Op;

+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG

+/// rendered using 'dot'.

+///

+void SelectionDAG::viewGraph(const std::string &Title) {

+// This code is only for debugging!

+#ifndef NDEBUG

+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),

+ Title);

+#else

+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "

+ << "systems with Graphviz or gv!\n";

+#endif // NDEBUG

+// This overload is defined out-of-line here instead of just using a

+// default parameter because this is easiest for gdb to call.

+void SelectionDAG::viewGraph() {

+ viewGraph("");

+/// clearGraphAttrs - Clear all previously defined node graph attributes.

+/// Intended to be used from a debugging tool (eg. gdb).

+void SelectionDAG::clearGraphAttrs() {

+#ifndef NDEBUG

+ NodeGraphAttrs.clear();

+#else

+ cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+#endif

+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)

+///

+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {

+#ifndef NDEBUG

+ NodeGraphAttrs[N] = Attrs;

+#else

+ cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+#endif

+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)

+/// Used from getNodeAttributes.

+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {

+#ifndef NDEBUG

+ std::map<const SDNode *, std::string>::const_iterator I =

+ NodeGraphAttrs.find(N);

+ if (I != NodeGraphAttrs.end())

+ return I->second;

+ else

+ return "";

+#else

+ cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+ return std::string("");

+#endif

+/// setGraphColor - Convenience for setting node color attribute.

+///

+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {

+#ifndef NDEBUG

+ NodeGraphAttrs[N] = std::string("color=") + Color;

+#else

+ cerr << "SelectionDAG::setGraphColor is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+#endif

+/// setSubgraphColorHelper - Implement setSubgraphColor. Return

+/// whether we truncated the search.

+///

+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,

+ int level, bool &printed) {

+ bool hit_limit = false;

+#ifndef NDEBUG

+ if (level >= 20) {

+ if (!printed) {

+ printed = true;

+ DOUT << "setSubgraphColor hit max level\n";

+ }

+ return true;

+ }

+ unsigned oldSize = visited.size();

+ visited.insert(N);

+ if (visited.size() != oldSize) {

+ setGraphColor(N, Color);

+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);

+ i != iend;

+ ++i) {

+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;

+ }

+#else

+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+#endif

+ return hit_limit;

+/// setSubgraphColor - Convenience for setting subgraph color attribute.

+///

+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {

+#ifndef NDEBUG

+ DenseSet<SDNode *> visited;

+ bool printed = false;

+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {

+ // Visually mark that we hit the limit

+ if (strcmp(Color, "red") == 0) {

+ setSubgraphColorHelper(N, "blue", visited, 0, printed);

+ }

+ else if (strcmp(Color, "yellow") == 0) {

+ setSubgraphColorHelper(N, "green", visited, 0, printed);

+ }

+#else

+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"

+ << " on systems with Graphviz or gv!\n";

+#endif

+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {

+ std::string s;

+ raw_string_ostream O(s);

+ O << "SU(" << SU->NodeNum << "): ";

+ if (SU->getNode()) {

+ SmallVector<SDNode *, 4> FlaggedNodes;

+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())

+ FlaggedNodes.push_back(N);

+ while (!FlaggedNodes.empty()) {

+ O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG);

+ FlaggedNodes.pop_back();

+ if (!FlaggedNodes.empty())

+ O << "\n ";

+ }

+ } else {

+ O << "CROSS RC COPY";

+ }

+ return O.str();

+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {

+ if (DAG) {

+ // Draw a special "GraphRoot" node to indicate the root of the graph.

+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");

+ const SDNode *N = DAG->getRoot().getNode();

+ if (N && N->getNodeId() != -1)

+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,

+ "color=blue,style=dashed");

+ }

diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..3334e53f0fbc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -0,0 +1,2592 @@

+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This implements the TargetLowering class.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/Target/TargetAsmInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetSubtarget.h"

+#include "llvm/Target/TargetData.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/GlobalVariable.h"

+#include "llvm/DerivedTypes.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/ADT/StringExtras.h"

+#include "llvm/ADT/STLExtras.h"

+#include "llvm/Support/MathExtras.h"

+using namespace llvm;

+namespace llvm {

+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {

+ bool isLocal = GV->hasLocalLinkage();

+ bool isDeclaration = GV->isDeclaration();

+ // FIXME: what should we do for protected and internal visibility?

+ // For variables, is internal different from hidden?

+ bool isHidden = GV->hasHiddenVisibility();

+ if (reloc == Reloc::PIC_) {

+ if (isLocal || isHidden)

+ return TLSModel::LocalDynamic;

+ else

+ return TLSModel::GeneralDynamic;

+ } else {

+ if (!isDeclaration || isHidden)

+ return TLSModel::LocalExec;

+ else

+ return TLSModel::InitialExec;

+ }

+/// InitLibcallNames - Set default libcall names.

+///

+static void InitLibcallNames(const char **Names) {

+ Names[RTLIB::SHL_I16] = "__ashlhi3";

+ Names[RTLIB::SHL_I32] = "__ashlsi3";

+ Names[RTLIB::SHL_I64] = "__ashldi3";

+ Names[RTLIB::SHL_I128] = "__ashlti3";

+ Names[RTLIB::SRL_I16] = "__lshrhi3";

+ Names[RTLIB::SRL_I32] = "__lshrsi3";

+ Names[RTLIB::SRL_I64] = "__lshrdi3";

+ Names[RTLIB::SRL_I128] = "__lshrti3";

+ Names[RTLIB::SRA_I16] = "__ashrhi3";

+ Names[RTLIB::SRA_I32] = "__ashrsi3";

+ Names[RTLIB::SRA_I64] = "__ashrdi3";

+ Names[RTLIB::SRA_I128] = "__ashrti3";

+ Names[RTLIB::MUL_I16] = "__mulhi3";

+ Names[RTLIB::MUL_I32] = "__mulsi3";

+ Names[RTLIB::MUL_I64] = "__muldi3";

+ Names[RTLIB::MUL_I128] = "__multi3";

+ Names[RTLIB::SDIV_I16] = "__divhi3";

+ Names[RTLIB::SDIV_I32] = "__divsi3";

+ Names[RTLIB::SDIV_I64] = "__divdi3";

+ Names[RTLIB::SDIV_I128] = "__divti3";

+ Names[RTLIB::UDIV_I16] = "__udivhi3";

+ Names[RTLIB::UDIV_I32] = "__udivsi3";

+ Names[RTLIB::UDIV_I64] = "__udivdi3";

+ Names[RTLIB::UDIV_I128] = "__udivti3";

+ Names[RTLIB::SREM_I16] = "__modhi3";

+ Names[RTLIB::SREM_I32] = "__modsi3";

+ Names[RTLIB::SREM_I64] = "__moddi3";

+ Names[RTLIB::SREM_I128] = "__modti3";

+ Names[RTLIB::UREM_I16] = "__umodhi3";

+ Names[RTLIB::UREM_I32] = "__umodsi3";

+ Names[RTLIB::UREM_I64] = "__umoddi3";

+ Names[RTLIB::UREM_I128] = "__umodti3";

+ Names[RTLIB::NEG_I32] = "__negsi2";

+ Names[RTLIB::NEG_I64] = "__negdi2";

+ Names[RTLIB::ADD_F32] = "__addsf3";

+ Names[RTLIB::ADD_F64] = "__adddf3";

+ Names[RTLIB::ADD_F80] = "__addxf3";

+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";

+ Names[RTLIB::SUB_F32] = "__subsf3";

+ Names[RTLIB::SUB_F64] = "__subdf3";

+ Names[RTLIB::SUB_F80] = "__subxf3";

+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";

+ Names[RTLIB::MUL_F32] = "__mulsf3";

+ Names[RTLIB::MUL_F64] = "__muldf3";

+ Names[RTLIB::MUL_F80] = "__mulxf3";

+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";

+ Names[RTLIB::DIV_F32] = "__divsf3";

+ Names[RTLIB::DIV_F64] = "__divdf3";

+ Names[RTLIB::DIV_F80] = "__divxf3";

+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";

+ Names[RTLIB::REM_F32] = "fmodf";

+ Names[RTLIB::REM_F64] = "fmod";

+ Names[RTLIB::REM_F80] = "fmodl";

+ Names[RTLIB::REM_PPCF128] = "fmodl";

+ Names[RTLIB::POWI_F32] = "__powisf2";

+ Names[RTLIB::POWI_F64] = "__powidf2";

+ Names[RTLIB::POWI_F80] = "__powixf2";

+ Names[RTLIB::POWI_PPCF128] = "__powitf2";

+ Names[RTLIB::SQRT_F32] = "sqrtf";

+ Names[RTLIB::SQRT_F64] = "sqrt";

+ Names[RTLIB::SQRT_F80] = "sqrtl";

+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";

+ Names[RTLIB::LOG_F32] = "logf";

+ Names[RTLIB::LOG_F64] = "log";

+ Names[RTLIB::LOG_F80] = "logl";

+ Names[RTLIB::LOG_PPCF128] = "logl";

+ Names[RTLIB::LOG2_F32] = "log2f";

+ Names[RTLIB::LOG2_F64] = "log2";

+ Names[RTLIB::LOG2_F80] = "log2l";

+ Names[RTLIB::LOG2_PPCF128] = "log2l";

+ Names[RTLIB::LOG10_F32] = "log10f";

+ Names[RTLIB::LOG10_F64] = "log10";

+ Names[RTLIB::LOG10_F80] = "log10l";

+ Names[RTLIB::LOG10_PPCF128] = "log10l";

+ Names[RTLIB::EXP_F32] = "expf";

+ Names[RTLIB::EXP_F64] = "exp";

+ Names[RTLIB::EXP_F80] = "expl";

+ Names[RTLIB::EXP_PPCF128] = "expl";

+ Names[RTLIB::EXP2_F32] = "exp2f";

+ Names[RTLIB::EXP2_F64] = "exp2";

+ Names[RTLIB::EXP2_F80] = "exp2l";

+ Names[RTLIB::EXP2_PPCF128] = "exp2l";

+ Names[RTLIB::SIN_F32] = "sinf";

+ Names[RTLIB::SIN_F64] = "sin";

+ Names[RTLIB::SIN_F80] = "sinl";

+ Names[RTLIB::SIN_PPCF128] = "sinl";

+ Names[RTLIB::COS_F32] = "cosf";

+ Names[RTLIB::COS_F64] = "cos";

+ Names[RTLIB::COS_F80] = "cosl";

+ Names[RTLIB::COS_PPCF128] = "cosl";

+ Names[RTLIB::POW_F32] = "powf";

+ Names[RTLIB::POW_F64] = "pow";

+ Names[RTLIB::POW_F80] = "powl";

+ Names[RTLIB::POW_PPCF128] = "powl";

+ Names[RTLIB::CEIL_F32] = "ceilf";

+ Names[RTLIB::CEIL_F64] = "ceil";

+ Names[RTLIB::CEIL_F80] = "ceill";

+ Names[RTLIB::CEIL_PPCF128] = "ceill";

+ Names[RTLIB::TRUNC_F32] = "truncf";

+ Names[RTLIB::TRUNC_F64] = "trunc";

+ Names[RTLIB::TRUNC_F80] = "truncl";

+ Names[RTLIB::TRUNC_PPCF128] = "truncl";

+ Names[RTLIB::RINT_F32] = "rintf";

+ Names[RTLIB::RINT_F64] = "rint";

+ Names[RTLIB::RINT_F80] = "rintl";

+ Names[RTLIB::RINT_PPCF128] = "rintl";

+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";

+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";

+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";

+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";

+ Names[RTLIB::FLOOR_F32] = "floorf";

+ Names[RTLIB::FLOOR_F64] = "floor";

+ Names[RTLIB::FLOOR_F80] = "floorl";

+ Names[RTLIB::FLOOR_PPCF128] = "floorl";

+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";

+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";

+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";

+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";

+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";

+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";

+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";

+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";

+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";

+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";

+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";

+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";

+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";

+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";

+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";

+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";

+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";

+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";

+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";

+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";

+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";

+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";

+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";

+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";

+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";

+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";

+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";

+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";

+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";

+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";

+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";

+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";

+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";

+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";

+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";

+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";

+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";

+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";

+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";

+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";

+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";

+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";

+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";

+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";

+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";

+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";

+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";

+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";

+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";

+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";

+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";

+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";

+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";

+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";

+ Names[RTLIB::OEQ_F32] = "__eqsf2";

+ Names[RTLIB::OEQ_F64] = "__eqdf2";

+ Names[RTLIB::UNE_F32] = "__nesf2";

+ Names[RTLIB::UNE_F64] = "__nedf2";

+ Names[RTLIB::OGE_F32] = "__gesf2";

+ Names[RTLIB::OGE_F64] = "__gedf2";

+ Names[RTLIB::OLT_F32] = "__ltsf2";

+ Names[RTLIB::OLT_F64] = "__ltdf2";

+ Names[RTLIB::OLE_F32] = "__lesf2";

+ Names[RTLIB::OLE_F64] = "__ledf2";

+ Names[RTLIB::OGT_F32] = "__gtsf2";

+ Names[RTLIB::OGT_F64] = "__gtdf2";

+ Names[RTLIB::UO_F32] = "__unordsf2";

+ Names[RTLIB::UO_F64] = "__unorddf2";

+ Names[RTLIB::O_F32] = "__unordsf2";

+ Names[RTLIB::O_F64] = "__unorddf2";

+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";

+/// getFPEXT - Return the FPEXT_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {

+ if (OpVT == MVT::f32) {

+ if (RetVT == MVT::f64)

+ return FPEXT_F32_F64;

+ }

+ return UNKNOWN_LIBCALL;

+/// getFPROUND - Return the FPROUND_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {

+ if (RetVT == MVT::f32) {

+ if (OpVT == MVT::f64)

+ return FPROUND_F64_F32;

+ if (OpVT == MVT::f80)

+ return FPROUND_F80_F32;

+ if (OpVT == MVT::ppcf128)

+ return FPROUND_PPCF128_F32;

+ } else if (RetVT == MVT::f64) {

+ if (OpVT == MVT::f80)

+ return FPROUND_F80_F64;

+ if (OpVT == MVT::ppcf128)

+ return FPROUND_PPCF128_F64;

+ }

+ return UNKNOWN_LIBCALL;

+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {

+ if (OpVT == MVT::f32) {

+ if (RetVT == MVT::i32)

+ return FPTOSINT_F32_I32;

+ if (RetVT == MVT::i64)

+ return FPTOSINT_F32_I64;

+ if (RetVT == MVT::i128)

+ return FPTOSINT_F32_I128;

+ } else if (OpVT == MVT::f64) {

+ if (RetVT == MVT::i32)

+ return FPTOSINT_F64_I32;

+ if (RetVT == MVT::i64)

+ return FPTOSINT_F64_I64;

+ if (RetVT == MVT::i128)

+ return FPTOSINT_F64_I128;

+ } else if (OpVT == MVT::f80) {

+ if (RetVT == MVT::i32)

+ return FPTOSINT_F80_I32;

+ if (RetVT == MVT::i64)

+ return FPTOSINT_F80_I64;

+ if (RetVT == MVT::i128)

+ return FPTOSINT_F80_I128;

+ } else if (OpVT == MVT::ppcf128) {

+ if (RetVT == MVT::i32)

+ return FPTOSINT_PPCF128_I32;

+ if (RetVT == MVT::i64)

+ return FPTOSINT_PPCF128_I64;

+ if (RetVT == MVT::i128)

+ return FPTOSINT_PPCF128_I128;

+ }

+ return UNKNOWN_LIBCALL;

+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {

+ if (OpVT == MVT::f32) {

+ if (RetVT == MVT::i32)

+ return FPTOUINT_F32_I32;

+ if (RetVT == MVT::i64)

+ return FPTOUINT_F32_I64;

+ if (RetVT == MVT::i128)

+ return FPTOUINT_F32_I128;

+ } else if (OpVT == MVT::f64) {

+ if (RetVT == MVT::i32)

+ return FPTOUINT_F64_I32;

+ if (RetVT == MVT::i64)

+ return FPTOUINT_F64_I64;

+ if (RetVT == MVT::i128)

+ return FPTOUINT_F64_I128;

+ } else if (OpVT == MVT::f80) {

+ if (RetVT == MVT::i32)

+ return FPTOUINT_F80_I32;

+ if (RetVT == MVT::i64)

+ return FPTOUINT_F80_I64;

+ if (RetVT == MVT::i128)

+ return FPTOUINT_F80_I128;

+ } else if (OpVT == MVT::ppcf128) {

+ if (RetVT == MVT::i32)

+ return FPTOUINT_PPCF128_I32;

+ if (RetVT == MVT::i64)

+ return FPTOUINT_PPCF128_I64;

+ if (RetVT == MVT::i128)

+ return FPTOUINT_PPCF128_I128;

+ }

+ return UNKNOWN_LIBCALL;

+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {

+ if (OpVT == MVT::i32) {

+ if (RetVT == MVT::f32)

+ return SINTTOFP_I32_F32;

+ else if (RetVT == MVT::f64)

+ return SINTTOFP_I32_F64;

+ else if (RetVT == MVT::f80)

+ return SINTTOFP_I32_F80;

+ else if (RetVT == MVT::ppcf128)

+ return SINTTOFP_I32_PPCF128;

+ } else if (OpVT == MVT::i64) {

+ if (RetVT == MVT::f32)

+ return SINTTOFP_I64_F32;

+ else if (RetVT == MVT::f64)

+ return SINTTOFP_I64_F64;

+ else if (RetVT == MVT::f80)

+ return SINTTOFP_I64_F80;

+ else if (RetVT == MVT::ppcf128)

+ return SINTTOFP_I64_PPCF128;

+ } else if (OpVT == MVT::i128) {

+ if (RetVT == MVT::f32)

+ return SINTTOFP_I128_F32;

+ else if (RetVT == MVT::f64)

+ return SINTTOFP_I128_F64;

+ else if (RetVT == MVT::f80)

+ return SINTTOFP_I128_F80;

+ else if (RetVT == MVT::ppcf128)

+ return SINTTOFP_I128_PPCF128;

+ }

+ return UNKNOWN_LIBCALL;

+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or

+/// UNKNOWN_LIBCALL if there is none.

+RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) {

+ if (OpVT == MVT::i32) {

+ if (RetVT == MVT::f32)

+ return UINTTOFP_I32_F32;

+ else if (RetVT == MVT::f64)

+ return UINTTOFP_I32_F64;

+ else if (RetVT == MVT::f80)

+ return UINTTOFP_I32_F80;

+ else if (RetVT == MVT::ppcf128)

+ return UINTTOFP_I32_PPCF128;

+ } else if (OpVT == MVT::i64) {

+ if (RetVT == MVT::f32)

+ return UINTTOFP_I64_F32;

+ else if (RetVT == MVT::f64)

+ return UINTTOFP_I64_F64;

+ else if (RetVT == MVT::f80)

+ return UINTTOFP_I64_F80;

+ else if (RetVT == MVT::ppcf128)

+ return UINTTOFP_I64_PPCF128;

+ } else if (OpVT == MVT::i128) {

+ if (RetVT == MVT::f32)

+ return UINTTOFP_I128_F32;

+ else if (RetVT == MVT::f64)

+ return UINTTOFP_I128_F64;

+ else if (RetVT == MVT::f80)

+ return UINTTOFP_I128_F80;

+ else if (RetVT == MVT::ppcf128)

+ return UINTTOFP_I128_PPCF128;

+ }

+ return UNKNOWN_LIBCALL;

+/// InitCmpLibcallCCs - Set default comparison libcall CC.

+///

+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {

+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);

+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;

+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;

+ CCs[RTLIB::UNE_F32] = ISD::SETNE;

+ CCs[RTLIB::UNE_F64] = ISD::SETNE;

+ CCs[RTLIB::OGE_F32] = ISD::SETGE;

+ CCs[RTLIB::OGE_F64] = ISD::SETGE;

+ CCs[RTLIB::OLT_F32] = ISD::SETLT;

+ CCs[RTLIB::OLT_F64] = ISD::SETLT;

+ CCs[RTLIB::OLE_F32] = ISD::SETLE;

+ CCs[RTLIB::OLE_F64] = ISD::SETLE;

+ CCs[RTLIB::OGT_F32] = ISD::SETGT;

+ CCs[RTLIB::OGT_F64] = ISD::SETGT;

+ CCs[RTLIB::UO_F32] = ISD::SETNE;

+ CCs[RTLIB::UO_F64] = ISD::SETNE;

+ CCs[RTLIB::O_F32] = ISD::SETEQ;

+ CCs[RTLIB::O_F64] = ISD::SETEQ;

+TargetLowering::TargetLowering(TargetMachine &tm)

+ : TM(tm), TD(TM.getTargetData()) {

+ // All operations default to being supported.

+ memset(OpActions, 0, sizeof(OpActions));

+ memset(LoadExtActions, 0, sizeof(LoadExtActions));

+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));

+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));

+ memset(ConvertActions, 0, sizeof(ConvertActions));

+ memset(CondCodeActions, 0, sizeof(CondCodeActions));

+ // Set default actions for various operations.

+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {

+ // Default all indexed load / store to expand.

+ for (unsigned IM = (unsigned)ISD::PRE_INC;

+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {

+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);

+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);

+ }

+ // These operations default to expand.

+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);

+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);

+ }

+ // Most targets ignore the @llvm.prefetch intrinsic.

+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);

+ // ConstantFP nodes default to expand. Targets can either change this to

+ // Legal, in which case all fp constants are legal, or use addLegalFPImmediate

+ // to optimize expansions for certain constants.

+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);

+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);

+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);

+ // These library functions default to expand.

+ setOperationAction(ISD::FLOG , MVT::f64, Expand);

+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);

+ setOperationAction(ISD::FLOG10,MVT::f64, Expand);

+ setOperationAction(ISD::FEXP , MVT::f64, Expand);

+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);

+ setOperationAction(ISD::FLOG , MVT::f32, Expand);

+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);

+ setOperationAction(ISD::FLOG10,MVT::f32, Expand);

+ setOperationAction(ISD::FEXP , MVT::f32, Expand);

+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);

+ // Default ISD::TRAP to expand (which turns it into abort).

+ setOperationAction(ISD::TRAP, MVT::Other, Expand);

+ IsLittleEndian = TD->isLittleEndian();

+ UsesGlobalOffsetTable = false;

+ ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType());

+ ShiftAmtHandling = Undefined;

+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));

+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));

+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;

+ allowUnalignedMemoryAccesses = false;

+ benefitFromCodePlacementOpt = false;

+ UseUnderscoreSetJmp = false;

+ UseUnderscoreLongJmp = false;

+ SelectIsExpensive = false;

+ IntDivIsCheap = false;

+ Pow2DivIsCheap = false;

+ StackPointerRegisterToSaveRestore = 0;

+ ExceptionPointerRegister = 0;

+ ExceptionSelectorRegister = 0;

+ BooleanContents = UndefinedBooleanContent;

+ SchedPreferenceInfo = SchedulingForLatency;

+ JumpBufSize = 0;

+ JumpBufAlignment = 0;

+ IfCvtBlockSizeLimit = 2;

+ IfCvtDupBlockSizeLimit = 0;

+ PrefLoopAlignment = 0;

+ InitLibcallNames(LibcallRoutineNames);

+ InitCmpLibcallCCs(CmpLibcallCCs);

+ // Tell Legalize whether the assembler supports DEBUG_LOC.

+ const TargetAsmInfo *TASM = TM.getTargetAsmInfo();

+ if (!TASM || !TASM->hasDotLocAndDotFile())

+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);

+TargetLowering::~TargetLowering() {}

+/// computeRegisterProperties - Once all of the register classes are added,

+/// this allows us to compute derived properties we expose.

+void TargetLowering::computeRegisterProperties() {

+ assert(MVT::LAST_VALUETYPE <= 32 &&

+ "Too many value types for ValueTypeActions to hold!");

+ // Everything defaults to needing one register.

+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {

+ NumRegistersForVT[i] = 1;

+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;

+ }

+ // ...except isVoid, which doesn't need any registers.

+ NumRegistersForVT[MVT::isVoid] = 0;

+ // Find the largest integer register class.

+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;

+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)

+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");

+ // Every integer value type larger than this largest register takes twice as

+ // many registers to represent as the previous ValueType.

+ for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {

+ MVT EVT = (MVT::SimpleValueType)ExpandedReg;

+ if (!EVT.isInteger())

+ break;

+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];

+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;

+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);

+ ValueTypeActions.setTypeAction(EVT, Expand);

+ }

+ // Inspect all of the ValueType's smaller than the largest integer

+ // register to see which ones need promotion.

+ unsigned LegalIntReg = LargestIntReg;

+ for (unsigned IntReg = LargestIntReg - 1;

+ IntReg >= (unsigned)MVT::i1; --IntReg) {

+ MVT IVT = (MVT::SimpleValueType)IntReg;

+ if (isTypeLegal(IVT)) {

+ LegalIntReg = IntReg;

+ } else {

+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =

+ (MVT::SimpleValueType)LegalIntReg;

+ ValueTypeActions.setTypeAction(IVT, Promote);

+ }

+ // ppcf128 type is really two f64's.

+ if (!isTypeLegal(MVT::ppcf128)) {

+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];

+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;

+ TransformToType[MVT::ppcf128] = MVT::f64;

+ ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);

+ }

+ // Decide how to handle f64. If the target does not have native f64 support,

+ // expand it to i64 and we will be generating soft float library calls.

+ if (!isTypeLegal(MVT::f64)) {

+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];

+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];

+ TransformToType[MVT::f64] = MVT::i64;

+ ValueTypeActions.setTypeAction(MVT::f64, Expand);

+ }

+ // Decide how to handle f32. If the target does not have native support for

+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.

+ if (!isTypeLegal(MVT::f32)) {

+ if (isTypeLegal(MVT::f64)) {

+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];

+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];

+ TransformToType[MVT::f32] = MVT::f64;

+ ValueTypeActions.setTypeAction(MVT::f32, Promote);

+ } else {

+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];

+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];

+ TransformToType[MVT::f32] = MVT::i32;

+ ValueTypeActions.setTypeAction(MVT::f32, Expand);

+ }

+ // Loop over all of the vector value types to see which need transformations.

+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;

+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {

+ MVT VT = (MVT::SimpleValueType)i;

+ if (!isTypeLegal(VT)) {

+ MVT IntermediateVT, RegisterVT;

+ unsigned NumIntermediates;

+ NumRegistersForVT[i] =

+ getVectorTypeBreakdown(VT,

+ IntermediateVT, NumIntermediates,

+ RegisterVT);

+ RegisterTypeForVT[i] = RegisterVT;

+ // Determine if there is a legal wider type.

+ bool IsLegalWiderType = false;

+ MVT EltVT = VT.getVectorElementType();

+ unsigned NElts = VT.getVectorNumElements();

+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {

+ MVT SVT = (MVT::SimpleValueType)nVT;

+ if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&

+ SVT.getVectorNumElements() > NElts) {

+ TransformToType[i] = SVT;

+ ValueTypeActions.setTypeAction(VT, Promote);

+ IsLegalWiderType = true;

+ break;

+ }

+ if (!IsLegalWiderType) {

+ MVT NVT = VT.getPow2VectorType();

+ if (NVT == VT) {

+ // Type is already a power of 2. The default action is to split.

+ TransformToType[i] = MVT::Other;

+ ValueTypeActions.setTypeAction(VT, Expand);

+ } else {

+ TransformToType[i] = NVT;

+ ValueTypeActions.setTypeAction(VT, Promote);

+ }

+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {

+ return NULL;

+MVT TargetLowering::getSetCCResultType(MVT VT) const {

+ return getValueType(TD->getIntPtrType());

+/// getVectorTypeBreakdown - Vector types are broken down into some number of

+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32

+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.

+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.

+///

+/// This method returns the number of registers needed, and the VT for each

+/// register. It also returns the VT and quantity of the intermediate values

+/// before they are promoted/expanded.

+///

+unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,

+ MVT &IntermediateVT,

+ unsigned &NumIntermediates,

+ MVT &RegisterVT) const {

+ // Figure out the right, legal destination reg to copy into.

+ unsigned NumElts = VT.getVectorNumElements();

+ MVT EltTy = VT.getVectorElementType();

+ unsigned NumVectorRegs = 1;

+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we

+ // could break down into LHS/RHS like LegalizeDAG does.

+ if (!isPowerOf2_32(NumElts)) {

+ NumVectorRegs = NumElts;

+ NumElts = 1;

+ }

+ // Divide the input until we get to a supported size. This will always

+ // end with a scalar if the target doesn't support vectors.

+ while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {

+ NumElts >>= 1;

+ NumVectorRegs <<= 1;

+ }

+ NumIntermediates = NumVectorRegs;

+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);

+ if (!isTypeLegal(NewVT))

+ NewVT = EltTy;

+ IntermediateVT = NewVT;

+ MVT DestVT = getRegisterType(NewVT);

+ RegisterVT = DestVT;

+ if (DestVT.bitsLT(NewVT)) {

+ // Value is expanded, e.g. i64 -> i16.

+ return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());

+ } else {

+ // Otherwise, promotion or legal types use the same number of registers as

+ // the vector decimated to the appropriate level.

+ return NumVectorRegs;

+ }

+ return 1;

+/// getWidenVectorType: given a vector type, returns the type to widen to

+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.

+/// If there is no vector type that we want to widen to, returns MVT::Other

+/// When and where to widen is target dependent based on the cost of

+/// scalarizing vs using the wider vector type.

+MVT TargetLowering::getWidenVectorType(MVT VT) const {

+ assert(VT.isVector());

+ if (isTypeLegal(VT))

+ return VT;

+ // Default is not to widen until moved to LegalizeTypes

+ return MVT::Other;

+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate

+/// function arguments in the caller parameter area. This is the actual

+/// alignment, not its logarithm.

+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {

+ return TD->getCallFrameTypeAlignment(Ty);

+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,

+ SelectionDAG &DAG) const {

+ if (usesGlobalOffsetTable())

+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());

+ return Table;

+bool

+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

+ // Assume that everything is safe in static mode.

+ if (getTargetMachine().getRelocationModel() == Reloc::Static)

+ return true;

+ // In dynamic-no-pic mode, assume that known defined values are safe.

+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&

+ GA &&

+ !GA->getGlobal()->isDeclaration() &&

+ !GA->getGlobal()->isWeakForLinker())

+ return true;

+ // Otherwise assume nothing is safe.

+ return false;

+//===----------------------------------------------------------------------===//

+// Optimization Methods

+//===----------------------------------------------------------------------===//

+/// ShrinkDemandedConstant - Check to see if the specified operand of the

+/// specified instruction is a constant integer. If so, check to see if there

+/// are any bits set in the constant that are not demanded. If so, shrink the

+/// constant and return true.

+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,

+ const APInt &Demanded) {

+ DebugLoc dl = Op.getDebugLoc();

+ // FIXME: ISD::SELECT, ISD::SELECT_CC

+ switch (Op.getOpcode()) {

+ default: break;

+ case ISD::XOR:

+ case ISD::AND:

+ case ISD::OR: {

+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

+ if (!C) return false;

+ if (Op.getOpcode() == ISD::XOR &&

+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())

+ return false;

+ // if we can expand it to have all bits set, do it

+ if (C->getAPIntValue().intersects(~Demanded)) {

+ MVT VT = Op.getValueType();

+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),

+ DAG.getConstant(Demanded &

+ C->getAPIntValue(),

+ VT));

+ return CombineTo(Op, New);

+ }

+ break;

+ }

+ return false;

+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the

+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening

+/// cast, but it could be generalized for targets with other types of

+/// implicit widening casts.

+bool

+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,

+ unsigned BitWidth,

+ const APInt &Demanded,

+ DebugLoc dl) {

+ assert(Op.getNumOperands() == 2 &&

+ "ShrinkDemandedOp only supports binary operators!");

+ assert(Op.getNode()->getNumValues() == 1 &&

+ "ShrinkDemandedOp only supports nodes with one result!");

+ // Don't do this if the node has another user, which may require the

+ // full value.

+ if (!Op.getNode()->hasOneUse())

+ return false;

+ // Search for the smallest integer type with free casts to and from

+ // Op's type. For expedience, just check power-of-2 integer types.

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();

+ if (!isPowerOf2_32(SmallVTBits))

+ SmallVTBits = NextPowerOf2(SmallVTBits);

+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {

+ MVT SmallVT = MVT::getIntegerVT(SmallVTBits);

+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&

+ TLI.isZExtFree(SmallVT, Op.getValueType())) {

+ // We found a type with free casts.

+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,

+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,

+ Op.getNode()->getOperand(0)),

+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,

+ Op.getNode()->getOperand(1)));

+ SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);

+ return CombineTo(Op, Z);

+ }

+ return false;

+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the

+/// DemandedMask bits of the result of Op are ever used downstream. If we can

+/// use this information to simplify Op, create a new simplified DAG node and

+/// return true, returning the original and new nodes in Old and New. Otherwise,

+/// analyze the expression and return a mask of KnownOne and KnownZero bits for

+/// the expression (used to simplify the caller). The KnownZero/One bits may

+/// only be accurate for those bits in the DemandedMask.

+bool TargetLowering::SimplifyDemandedBits(SDValue Op,

+ const APInt &DemandedMask,

+ APInt &KnownZero,

+ APInt &KnownOne,

+ TargetLoweringOpt &TLO,

+ unsigned Depth) const {

+ unsigned BitWidth = DemandedMask.getBitWidth();

+ assert(Op.getValueSizeInBits() == BitWidth &&

+ "Mask size mismatches value type size!");

+ APInt NewMask = DemandedMask;

+ DebugLoc dl = Op.getDebugLoc();

+ // Don't know anything.

+ KnownZero = KnownOne = APInt(BitWidth, 0);

+ // Other users may use these bits.

+ if (!Op.getNode()->hasOneUse()) {

+ if (Depth != 0) {

+ // If not at the root, Just compute the KnownZero/KnownOne bits to

+ // simplify things downstream.

+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);

+ return false;

+ }

+ // If this is the root being simplified, allow it to have multiple uses,

+ // just set the NewMask to all bits.

+ NewMask = APInt::getAllOnesValue(BitWidth);

+ } else if (DemandedMask == 0) {

+ // Not demanding any bits from Op.

+ if (Op.getOpcode() != ISD::UNDEF)

+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));

+ return false;

+ } else if (Depth == 6) { // Limit search depth.

+ return false;

+ }

+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;

+ switch (Op.getOpcode()) {

+ case ISD::Constant:

+ // We know all of the bits for a constant!

+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;

+ KnownZero = ~KnownOne & NewMask;

+ return false; // Don't fall through, will infinitely loop.

+ case ISD::AND:

+ // If the RHS is a constant, check to see if the LHS would be zero without

+ // using the bits from the RHS. Below, we use knowledge about the RHS to

+ // simplify the LHS, here we're using information from the LHS to simplify

+ // the RHS.

+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ APInt LHSZero, LHSOne;

+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,

+ LHSZero, LHSOne, Depth+1);

+ // If the LHS already has zeros where RHSC does, this and is dead.

+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ // If any of the set bits in the RHS are known zero on the LHS, shrink

+ // the constant.

+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))

+ return true;

+ }

+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,

+ KnownZero2, KnownOne2, TLO, Depth+1))

+ return true;

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If all of the demanded bits are known one on one side, return the other.

+ // These bits cannot contribute to the result of the 'and'.

+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(1));

+ // If all of the demanded bits in the inputs are known zeros, return zero.

+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)

+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));

+ // If the RHS is a constant, see if we can simplify it.

+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))

+ return true;

+ // If the operation can be done in a smaller type, do so.

+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))

+ return true;

+ // Output known-1 bits are only known if set in both the LHS & RHS.

+ KnownOne &= KnownOne2;

+ // Output known-0 are known to be clear if zero in either the LHS | RHS.

+ KnownZero |= KnownZero2;

+ break;

+ case ISD::OR:

+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,

+ KnownZero2, KnownOne2, TLO, Depth+1))

+ return true;

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If all of the demanded bits are known zero on one side, return the other.

+ // These bits cannot contribute to the result of the 'or'.

+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(1));

+ // If all of the potentially set bits on one side are known to be set on

+ // the other side, just use the 'other' side.

+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))

+ return TLO.CombineTo(Op, Op.getOperand(1));

+ // If the RHS is a constant, see if we can simplify it.

+ if (TLO.ShrinkDemandedConstant(Op, NewMask))

+ return true;

+ // If the operation can be done in a smaller type, do so.

+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))

+ return true;

+ // Output known-0 bits are only known if clear in both the LHS & RHS.

+ KnownZero &= KnownZero2;

+ // Output known-1 are known to be set if set in either the LHS | RHS.

+ KnownOne |= KnownOne2;

+ break;

+ case ISD::XOR:

+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,

+ KnownOne2, TLO, Depth+1))

+ return true;

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If all of the demanded bits are known zero on one side, return the other.

+ // These bits cannot contribute to the result of the 'xor'.

+ if ((KnownZero & NewMask) == NewMask)

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ if ((KnownZero2 & NewMask) == NewMask)

+ return TLO.CombineTo(Op, Op.getOperand(1));

+ // If the operation can be done in a smaller type, do so.

+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))

+ return true;

+ // If all of the unknown bits are known to be zero on one side or the other

+ // (but not both) turn this into an *inclusive* or.

+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0

+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),

+ Op.getOperand(0),

+ Op.getOperand(1)));

+ // Output known-0 bits are known if clear or set in both the LHS & RHS.

+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);

+ // Output known-1 are known to be set if set in only one of the LHS, RHS.

+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);

+ // If all of the demanded bits on one side are known, and all of the set

+ // bits on that side are also known to be set on the other side, turn this

+ // into an AND, as we know the bits will be cleared.

+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2

+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known

+ if ((KnownOne & KnownOne2) == KnownOne) {

+ MVT VT = Op.getValueType();

+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,

+ Op.getOperand(0), ANDC));

+ }

+ // If the RHS is a constant, see if we can simplify it.

+ // for XOR, we prefer to force bits to 1 if they will make a -1.

+ // if we can't force bits, try to shrink constant

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ APInt Expanded = C->getAPIntValue() | (~NewMask);

+ // if we can expand it to have all bits set, do it

+ if (Expanded.isAllOnesValue()) {

+ if (Expanded != C->getAPIntValue()) {

+ MVT VT = Op.getValueType();

+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),

+ TLO.DAG.getConstant(Expanded, VT));

+ return TLO.CombineTo(Op, New);

+ }

+ // if it already has all the bits set, nothing to change

+ // but don't shrink either!

+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {

+ return true;

+ }

+ KnownZero = KnownZeroOut;

+ KnownOne = KnownOneOut;

+ break;

+ case ISD::SELECT:

+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,

+ KnownOne2, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If the operands are constants, see if we can simplify them.

+ if (TLO.ShrinkDemandedConstant(Op, NewMask))

+ return true;

+ // Only known if known in both the LHS and RHS.

+ KnownOne &= KnownOne2;

+ KnownZero &= KnownZero2;

+ break;

+ case ISD::SELECT_CC:

+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,

+ KnownOne2, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");

+ // If the operands are constants, see if we can simplify them.

+ if (TLO.ShrinkDemandedConstant(Op, NewMask))

+ return true;

+ // Only known if known in both the LHS and RHS.

+ KnownOne &= KnownOne2;

+ KnownZero &= KnownZero2;

+ break;

+ case ISD::SHL:

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ unsigned ShAmt = SA->getZExtValue();

+ SDValue InOp = Op.getOperand(0);

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ break;

+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a

+ // single shift. We can do this if the bottom bits (which are shifted

+ // out) are never demanded.

+ if (InOp.getOpcode() == ISD::SRL &&

+ isa<ConstantSDNode>(InOp.getOperand(1))) {

+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {

+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();

+ unsigned Opc = ISD::SHL;

+ int Diff = ShAmt-C1;

+ if (Diff < 0) {

+ Diff = -Diff;

+ Opc = ISD::SRL;

+ }

+ SDValue NewSA =

+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());

+ MVT VT = Op.getValueType();

+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,

+ InOp.getOperand(0), NewSA));

+ }

+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ KnownZero <<= SA->getZExtValue();

+ KnownOne <<= SA->getZExtValue();

+ // low bits known zero.

+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());

+ }

+ break;

+ case ISD::SRL:

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ MVT VT = Op.getValueType();

+ unsigned ShAmt = SA->getZExtValue();

+ unsigned VTSize = VT.getSizeInBits();

+ SDValue InOp = Op.getOperand(0);

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ break;

+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a

+ // single shift. We can do this if the top bits (which are shifted out)

+ // are never demanded.

+ if (InOp.getOpcode() == ISD::SHL &&

+ isa<ConstantSDNode>(InOp.getOperand(1))) {

+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {

+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();

+ unsigned Opc = ISD::SRL;

+ int Diff = ShAmt-C1;

+ if (Diff < 0) {

+ Diff = -Diff;

+ Opc = ISD::SHL;

+ }

+ SDValue NewSA =

+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());

+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,

+ InOp.getOperand(0), NewSA));

+ }

+ // Compute the new bits that are at the top now.

+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero = KnownZero.lshr(ShAmt);

+ KnownOne = KnownOne.lshr(ShAmt);

+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);

+ KnownZero |= HighBits; // High bits known zero.

+ }

+ break;

+ case ISD::SRA:

+ // If this is an arithmetic shift right and only the low-bit is set, we can

+ // always convert this into a logical shr, even if the shift amount is

+ // variable. The low bit of the shift cannot be an input sign bit unless

+ // the shift amount is >= the size of the datatype, which is undefined.

+ if (DemandedMask == 1)

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),

+ Op.getOperand(0), Op.getOperand(1)));

+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

+ MVT VT = Op.getValueType();

+ unsigned ShAmt = SA->getZExtValue();

+ // If the shift count is an invalid immediate, don't do anything.

+ if (ShAmt >= BitWidth)

+ break;

+ APInt InDemandedMask = (NewMask << ShAmt);

+ // If any of the demanded bits are produced by the sign extension, we also

+ // demand the input sign bit.

+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);

+ if (HighBits.intersects(NewMask))

+ InDemandedMask |= APInt::getSignBit(VT.getSizeInBits());

+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero = KnownZero.lshr(ShAmt);

+ KnownOne = KnownOne.lshr(ShAmt);

+ // Handle the sign bit, adjusted to where it is now in the mask.

+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);

+ // If the input sign bit is known to be zero, or if none of the top bits

+ // are demanded, turn this into an unsigned shift right.

+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,

+ Op.getOperand(0),

+ Op.getOperand(1)));

+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.

+ KnownOne |= HighBits;

+ }

+ break;

+ case ISD::SIGN_EXTEND_INREG: {

+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ // Sign extension. Compute the demanded bits in the result that are not

+ // present in the input.

+ APInt NewBits = APInt::getHighBitsSet(BitWidth,

+ BitWidth - EVT.getSizeInBits()) &

+ NewMask;

+ // If none of the extended bits are demanded, eliminate the sextinreg.

+ if (NewBits == 0)

+ return TLO.CombineTo(Op, Op.getOperand(0));

+ APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits());

+ InSignBit.zext(BitWidth);

+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth,

+ EVT.getSizeInBits()) &

+ NewMask;

+ // Since the sign extended bits are demanded, we know that the sign

+ // bit is demanded.

+ InputDemandedBits |= InSignBit;

+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ // If the sign bit of the input is known set or clear, then we know the

+ // top bits of the result.

+ // If the input sign bit is known zero, convert this into a zero extension.

+ if (KnownZero.intersects(InSignBit))

+ return TLO.CombineTo(Op,

+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));

+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set

+ KnownOne |= NewBits;

+ KnownZero &= ~NewBits;

+ } else { // Input sign bit unknown

+ KnownZero &= ~NewBits;

+ KnownOne &= ~NewBits;

+ }

+ break;

+ }

+ case ISD::ZERO_EXTEND: {

+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();

+ APInt InMask = NewMask;

+ InMask.trunc(OperandBitWidth);

+ // If none of the top bits are demanded, convert this into an any_extend.

+ APInt NewBits =

+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;

+ if (!NewBits.intersects(NewMask))

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,

+ Op.getValueType(),

+ Op.getOperand(0)));

+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ KnownZero |= NewBits;

+ break;

+ }

+ case ISD::SIGN_EXTEND: {

+ MVT InVT = Op.getOperand(0).getValueType();

+ unsigned InBits = InVT.getSizeInBits();

+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);

+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);

+ APInt NewBits = ~InMask & NewMask;

+ // If none of the top bits are demanded, convert this into an any_extend.

+ if (NewBits == 0)

+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,

+ Op.getValueType(),

+ Op.getOperand(0)));

+ // Since some of the sign extended bits are demanded, we know that the sign

+ // bit is demanded.

+ APInt InDemandedBits = InMask & NewMask;

+ InDemandedBits |= InSignBit;

+ InDemandedBits.trunc(InBits);

+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,

+ KnownOne, TLO, Depth+1))

+ return true;

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ // If the sign bit is known zero, convert this to a zero extend.

+ if (KnownZero.intersects(InSignBit))

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,

+ Op.getValueType(),

+ Op.getOperand(0)));

+ // If the sign bit is known one, the top bits match.

+ if (KnownOne.intersects(InSignBit)) {

+ KnownOne |= NewBits;

+ KnownZero &= ~NewBits;

+ } else { // Otherwise, top bits aren't known.

+ KnownOne &= ~NewBits;

+ KnownZero &= ~NewBits;

+ }

+ break;

+ }

+ case ISD::ANY_EXTEND: {

+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();

+ APInt InMask = NewMask;

+ InMask.trunc(OperandBitWidth);

+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero.zext(BitWidth);

+ KnownOne.zext(BitWidth);

+ break;

+ }

+ case ISD::TRUNCATE: {

+ // Simplify the input, using demanded bit information, and compute the known

+ // zero/one bits live out.

+ APInt TruncMask = NewMask;

+ TruncMask.zext(Op.getOperand(0).getValueSizeInBits());

+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ KnownZero.trunc(BitWidth);

+ KnownOne.trunc(BitWidth);

+ // If the input is only used by this truncate, see if we can shrink it based

+ // on the known demanded bits.

+ if (Op.getOperand(0).getNode()->hasOneUse()) {

+ SDValue In = Op.getOperand(0);

+ unsigned InBitWidth = In.getValueSizeInBits();

+ switch (In.getOpcode()) {

+ default: break;

+ case ISD::SRL:

+ // Shrink SRL by a constant if none of the high bits shifted in are

+ // demanded.

+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){

+ APInt HighBits = APInt::getHighBitsSet(InBitWidth,

+ InBitWidth - BitWidth);

+ HighBits = HighBits.lshr(ShAmt->getZExtValue());

+ HighBits.trunc(BitWidth);

+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {

+ // None of the shifted in bits are needed. Add a truncate of the

+ // shift input, then shift it.

+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,

+ Op.getValueType(),

+ In.getOperand(0));

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,

+ Op.getValueType(),

+ NewTrunc,

+ In.getOperand(1)));

+ }

+ break;

+ }

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ break;

+ }

+ case ISD::AssertZext: {

+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ APInt InMask = APInt::getLowBitsSet(BitWidth,

+ VT.getSizeInBits());

+ if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,

+ KnownZero, KnownOne, TLO, Depth+1))

+ return true;

+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");

+ KnownZero |= ~InMask & NewMask;

+ break;

+ }

+ case ISD::BIT_CONVERT:

+#if 0

+ // If this is an FP->Int bitcast and if the sign bit is the only thing that

+ // is demanded, turn this into a FGETSIGN.

+ if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) &&

+ MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&

+ !MVT::isVector(Op.getOperand(0).getValueType())) {

+ // Only do this xform if FGETSIGN is valid or if before legalize.

+ if (!TLO.AfterLegalize ||

+ isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {

+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate

+ // place. We expect the SHL to be eliminated by other optimizations.

+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),

+ Op.getOperand(0));

+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;

+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),

+ Sign, ShAmt));

+ }

+#endif

+ break;

+ case ISD::ADD:

+ case ISD::MUL:

+ case ISD::SUB: {

+ // Add, Sub, and Mul don't demand any bits in positions beyond that

+ // of the highest bit demanded of them.

+ APInt LoMask = APInt::getLowBitsSet(BitWidth,

+ BitWidth - NewMask.countLeadingZeros());

+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,

+ KnownOne2, TLO, Depth+1))

+ return true;

+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,

+ KnownOne2, TLO, Depth+1))

+ return true;

+ // See if the operation should be performed at a smaller bit width.

+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))

+ return true;

+ }

+ // FALL THROUGH

+ default:

+ // Just use ComputeMaskedBits to compute output bits.

+ TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);

+ break;

+ }

+ // If we know the value of all of the demanded bits, return this as a

+ // constant.

+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)

+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));

+ return false;

+/// computeMaskedBitsForTargetNode - Determine which of the bits specified

+/// in Mask are known to be either zero or one and return them in the

+/// KnownZero/KnownOne bitsets.

+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,

+ const APInt &Mask,

+ APInt &KnownZero,

+ APInt &KnownOne,

+ const SelectionDAG &DAG,

+ unsigned Depth) const {

+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||

+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&

+ "Should use MaskedValueIsZero if you don't know whether Op"

+ " is a target node!");

+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);

+/// ComputeNumSignBitsForTargetNode - This method can be implemented by

+/// targets that want to expose additional information about sign bits to the

+/// DAG Combiner.

+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,

+ unsigned Depth) const {

+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||

+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&

+ "Should use ComputeNumSignBits if you don't know whether Op"

+ " is a target node!");

+ return 1;

+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly

+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to

+/// determine which bit is set.

+///

+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {

+ // A left-shift of a constant one will have exactly one bit set, because

+ // shifting the bit off the end is undefined.

+ if (Val.getOpcode() == ISD::SHL)

+ if (ConstantSDNode *C =

+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))

+ if (C->getAPIntValue() == 1)

+ return true;

+ // Similarly, a right-shift of a constant sign-bit will have exactly

+ // one bit set.

+ if (Val.getOpcode() == ISD::SRL)

+ if (ConstantSDNode *C =

+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))

+ if (C->getAPIntValue().isSignBit())

+ return true;

+ // More could be done here, though the above checks are enough

+ // to handle some common cases.

+ // Fall back to ComputeMaskedBits to catch other known cases.

+ MVT OpVT = Val.getValueType();

+ unsigned BitWidth = OpVT.getSizeInBits();

+ APInt Mask = APInt::getAllOnesValue(BitWidth);

+ APInt KnownZero, KnownOne;

+ DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);

+ return (KnownZero.countPopulation() == BitWidth - 1) &&

+ (KnownOne.countPopulation() == 1);

+/// SimplifySetCC - Try to simplify a setcc built with the specified operands

+/// and cc. If it is unable to simplify it, return a null SDValue.

+SDValue

+TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,

+ ISD::CondCode Cond, bool foldBooleans,

+ DAGCombinerInfo &DCI, DebugLoc dl) const {

+ SelectionDAG &DAG = DCI.DAG;

+ // These setcc operations always fold.

+ switch (Cond) {

+ default: break;

+ case ISD::SETFALSE:

+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);

+ case ISD::SETTRUE:

+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);

+ }

+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {

+ const APInt &C1 = N1C->getAPIntValue();

+ if (isa<ConstantSDNode>(N0.getNode())) {

+ return DAG.FoldSetCC(VT, N0, N1, Cond, dl);

+ } else {

+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an

+ // equality comparison, then we're just comparing whether X itself is

+ // zero.

+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&

+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&

+ N0.getOperand(1).getOpcode() == ISD::Constant) {

+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();

+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {

+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {

+ // (srl (ctlz x), 5) == 0 -> X != 0

+ // (srl (ctlz x), 5) != 1 -> X != 0

+ Cond = ISD::SETNE;

+ } else {

+ // (srl (ctlz x), 5) != 0 -> X == 0

+ // (srl (ctlz x), 5) == 1 -> X == 0

+ Cond = ISD::SETEQ;

+ }

+ SDValue Zero = DAG.getConstant(0, N0.getValueType());

+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),

+ Zero, Cond);

+ }

+ // If the LHS is '(and load, const)', the RHS is 0,

+ // the test is for equality or unsigned, and all 1 bits of the const are

+ // in the same partial word, see if we can shorten the load.

+ if (DCI.isBeforeLegalize() &&

+ N0.getOpcode() == ISD::AND && C1 == 0 &&

+ N0.getNode()->hasOneUse() &&

+ isa<LoadSDNode>(N0.getOperand(0)) &&

+ N0.getOperand(0).getNode()->hasOneUse() &&

+ isa<ConstantSDNode>(N0.getOperand(1))) {

+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));

+ uint64_t bestMask = 0;

+ unsigned bestWidth = 0, bestOffset = 0;

+ if (!Lod->isVolatile() && Lod->isUnindexed() &&

+ // FIXME: This uses getZExtValue() below so it only works on i64 and

+ // below.

+ N0.getValueType().getSizeInBits() <= 64) {

+ unsigned origWidth = N0.getValueType().getSizeInBits();

+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to

+ // 8 bits, but have to be careful...

+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)

+ origWidth = Lod->getMemoryVT().getSizeInBits();

+ uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();

+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {

+ uint64_t newMask = (1ULL << width) - 1;

+ for (unsigned offset=0; offset<origWidth/width; offset++) {

+ if ((newMask & Mask) == Mask) {

+ if (!TD->isLittleEndian())

+ bestOffset = (origWidth/width - offset - 1) * (width/8);

+ else

+ bestOffset = (uint64_t)offset * (width/8);

+ bestMask = Mask >> (offset * (width/8) * 8);

+ bestWidth = width;

+ break;

+ }

+ newMask = newMask << width;

+ }

+ if (bestWidth) {

+ MVT newVT = MVT::getIntegerVT(bestWidth);

+ if (newVT.isRound()) {

+ MVT PtrType = Lod->getOperand(1).getValueType();

+ SDValue Ptr = Lod->getBasePtr();

+ if (bestOffset != 0)

+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),

+ DAG.getConstant(bestOffset, PtrType));

+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);

+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,

+ Lod->getSrcValue(),

+ Lod->getSrcValueOffset() + bestOffset,

+ false, NewAlign);

+ return DAG.getSetCC(dl, VT,

+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,

+ DAG.getConstant(bestMask, newVT)),

+ DAG.getConstant(0LL, newVT), Cond);

+ }

+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.

+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {

+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();

+ // If the comparison constant has bits in the upper part, the

+ // zero-extended value could never match.

+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),

+ C1.getBitWidth() - InSize))) {

+ switch (Cond) {

+ case ISD::SETUGT:

+ case ISD::SETUGE:

+ case ISD::SETEQ: return DAG.getConstant(0, VT);

+ case ISD::SETULT:

+ case ISD::SETULE:

+ case ISD::SETNE: return DAG.getConstant(1, VT);

+ case ISD::SETGT:

+ case ISD::SETGE:

+ // True if the sign bit of C1 is set.

+ return DAG.getConstant(C1.isNegative(), VT);

+ case ISD::SETLT:

+ case ISD::SETLE:

+ // True if the sign bit of C1 isn't set.

+ return DAG.getConstant(C1.isNonNegative(), VT);

+ default:

+ break;

+ }

+ // Otherwise, we can perform the comparison with the low bits.

+ switch (Cond) {

+ case ISD::SETEQ:

+ case ISD::SETNE:

+ case ISD::SETUGT:

+ case ISD::SETUGE:

+ case ISD::SETULT:

+ case ISD::SETULE:

+ return DAG.getSetCC(dl, VT, N0.getOperand(0),

+ DAG.getConstant(APInt(C1).trunc(InSize),

+ N0.getOperand(0).getValueType()),

+ Cond);

+ default:

+ break; // todo, be more careful with signed comparisons

+ }

+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&

+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

+ MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();

+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();

+ MVT ExtDstTy = N0.getValueType();

+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();

+ // If the extended part has any inconsistent bits, it cannot ever

+ // compare equal. In other words, they have to be all ones or all

+ // zeros.

+ APInt ExtBits =

+ APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);

+ if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)

+ return DAG.getConstant(Cond == ISD::SETNE, VT);

+ SDValue ZextOp;

+ MVT Op0Ty = N0.getOperand(0).getValueType();

+ if (Op0Ty == ExtSrcTy) {

+ ZextOp = N0.getOperand(0);

+ } else {

+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);

+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),

+ DAG.getConstant(Imm, Op0Ty));

+ }

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(ZextOp.getNode());

+ // Otherwise, make this a use of a zext.

+ return DAG.getSetCC(dl, VT, ZextOp,

+ DAG.getConstant(C1 & APInt::getLowBitsSet(

+ ExtDstTyBits,

+ ExtSrcTyBits),

+ ExtDstTy),

+ Cond);

+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&

+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC

+ if (N0.getOpcode() == ISD::SETCC) {

+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);

+ if (TrueWhenTrue)

+ return N0;

+ // Invert the condition.

+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

+ CC = ISD::getSetCCInverse(CC,

+ N0.getOperand(0).getValueType().isInteger());

+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);

+ }

+ if ((N0.getOpcode() == ISD::XOR ||

+ (N0.getOpcode() == ISD::AND &&

+ N0.getOperand(0).getOpcode() == ISD::XOR &&

+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&

+ isa<ConstantSDNode>(N0.getOperand(1)) &&

+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {

+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We

+ // can only do this if the top bits are known zero.

+ unsigned BitWidth = N0.getValueSizeInBits();

+ if (DAG.MaskedValueIsZero(N0,

+ APInt::getHighBitsSet(BitWidth,

+ BitWidth-1))) {

+ // Okay, get the un-inverted input value.

+ SDValue Val;

+ if (N0.getOpcode() == ISD::XOR)

+ Val = N0.getOperand(0);

+ else {

+ assert(N0.getOpcode() == ISD::AND &&

+ N0.getOperand(0).getOpcode() == ISD::XOR);

+ // ((X^1)&1)^1 -> X & 1

+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),

+ N0.getOperand(0).getOperand(0),

+ N0.getOperand(1));

+ }

+ return DAG.getSetCC(dl, VT, Val, N1,

+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);

+ }

+ APInt MinVal, MaxVal;

+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();

+ if (ISD::isSignedIntSetCC(Cond)) {

+ MinVal = APInt::getSignedMinValue(OperandBitSize);

+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);

+ } else {

+ MinVal = APInt::getMinValue(OperandBitSize);

+ MaxVal = APInt::getMaxValue(OperandBitSize);

+ }

+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.

+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {

+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true

+ // X >= C0 --> X > (C0-1)

+ return DAG.getSetCC(dl, VT, N0,

+ DAG.getConstant(C1-1, N1.getValueType()),

+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);

+ }

+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {

+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true

+ // X <= C0 --> X < (C0+1)

+ return DAG.getSetCC(dl, VT, N0,

+ DAG.getConstant(C1+1, N1.getValueType()),

+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);

+ }

+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)

+ return DAG.getConstant(0, VT); // X < MIN --> false

+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)

+ return DAG.getConstant(1, VT); // X >= MIN --> true

+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)

+ return DAG.getConstant(0, VT); // X > MAX --> false

+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)

+ return DAG.getConstant(1, VT); // X <= MAX --> true

+ // Canonicalize setgt X, Min --> setne X, Min

+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)

+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);

+ // Canonicalize setlt X, Max --> setne X, Max

+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)

+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);

+ // If we have setult X, 1, turn it into seteq X, 0

+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)

+ return DAG.getSetCC(dl, VT, N0,

+ DAG.getConstant(MinVal, N0.getValueType()),

+ ISD::SETEQ);

+ // If we have setugt X, Max-1, turn it into seteq X, Max

+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)

+ return DAG.getSetCC(dl, VT, N0,

+ DAG.getConstant(MaxVal, N0.getValueType()),

+ ISD::SETEQ);

+ // If we have "setcc X, C0", check to see if we can shrink the immediate

+ // by changing cc.

+ // SETUGT X, SINTMAX -> SETLT X, 0

+ if (Cond == ISD::SETUGT &&

+ C1 == APInt::getSignedMaxValue(OperandBitSize))

+ return DAG.getSetCC(dl, VT, N0,

+ DAG.getConstant(0, N1.getValueType()),

+ ISD::SETLT);

+ // SETULT X, SINTMIN -> SETGT X, -1

+ if (Cond == ISD::SETULT &&

+ C1 == APInt::getSignedMinValue(OperandBitSize)) {

+ SDValue ConstMinusOne =

+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),

+ N1.getValueType());

+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);

+ }

+ // Fold bit comparisons when we can.

+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ VT == N0.getValueType() && N0.getOpcode() == ISD::AND)

+ if (ConstantSDNode *AndRHS =

+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

+ MVT ShiftTy = DCI.isBeforeLegalize() ?

+ getPointerTy() : getShiftAmountTy();

+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3

+ // Perform the xform if the AND RHS is a single bit.

+ if (isPowerOf2_64(AndRHS->getZExtValue())) {

+ return DAG.getNode(ISD::SRL, dl, VT, N0,

+ DAG.getConstant(Log2_64(AndRHS->getZExtValue()),

+ ShiftTy));

+ }

+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {

+ // (X & 8) == 8 --> (X & 8) >> 3

+ // Perform the xform if C1 is a single bit.

+ if (C1.isPowerOf2()) {

+ return DAG.getNode(ISD::SRL, dl, VT, N0,

+ DAG.getConstant(C1.logBase2(), ShiftTy));

+ }

+ } else if (isa<ConstantSDNode>(N0.getNode())) {

+ // Ensure that the constant occurs on the RHS.

+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));

+ }

+ if (isa<ConstantFPSDNode>(N0.getNode())) {

+ // Constant fold or commute setcc.

+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);

+ if (O.getNode()) return O;

+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {

+ // If the RHS of an FP comparison is a constant, simplify it away in

+ // some cases.

+ if (CFP->getValueAPF().isNaN()) {

+ // If an operand is known to be a nan, we can fold it.

+ switch (ISD::getUnorderedFlavor(Cond)) {

+ default: assert(0 && "Unknown flavor!");

+ case 0: // Known false.

+ return DAG.getConstant(0, VT);

+ case 1: // Known true.

+ return DAG.getConstant(1, VT);

+ case 2: // Undefined.

+ return DAG.getUNDEF(VT);

+ }

+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the

+ // constant if knowing that the operand is non-nan is enough. We prefer to

+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to

+ // materialize 0.0.

+ if (Cond == ISD::SETO || Cond == ISD::SETUO)

+ return DAG.getSetCC(dl, VT, N0, N0, Cond);

+ }

+ if (N0 == N1) {

+ // We can always fold X == X for integer setcc's.

+ if (N0.getValueType().isInteger())

+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);

+ unsigned UOF = ISD::getUnorderedFlavor(Cond);

+ if (UOF == 2) // FP operators that are undefined on NaNs.

+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);

+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))

+ return DAG.getConstant(UOF, VT);

+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO

+ // if it is not already.

+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;

+ if (NewCond != Cond)

+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);

+ }

+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ N0.getValueType().isInteger()) {

+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||

+ N0.getOpcode() == ISD::XOR) {

+ // Simplify (X+Y) == (X+Z) --> Y == Z

+ if (N0.getOpcode() == N1.getOpcode()) {

+ if (N0.getOperand(0) == N1.getOperand(0))

+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);

+ if (N0.getOperand(1) == N1.getOperand(1))

+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);

+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {

+ // If X op Y == Y op X, try other combinations.

+ if (N0.getOperand(0) == N1.getOperand(1))

+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),

+ Cond);

+ if (N0.getOperand(1) == N1.getOperand(0))

+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),

+ Cond);

+ }

+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {

+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

+ // Turn (X+C1) == C2 --> X == C2-C1

+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {

+ return DAG.getSetCC(dl, VT, N0.getOperand(0),

+ DAG.getConstant(RHSC->getAPIntValue()-

+ LHSR->getAPIntValue(),

+ N0.getValueType()), Cond);

+ }

+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.

+ if (N0.getOpcode() == ISD::XOR)

+ // If we know that all of the inverted bits are zero, don't bother

+ // performing the inversion.

+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))

+ return

+ DAG.getSetCC(dl, VT, N0.getOperand(0),

+ DAG.getConstant(LHSR->getAPIntValue() ^

+ RHSC->getAPIntValue(),

+ N0.getValueType()),

+ Cond);

+ }

+ // Turn (C1-X) == C2 --> X == C1-C2

+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {

+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {

+ return

+ DAG.getSetCC(dl, VT, N0.getOperand(1),

+ DAG.getConstant(SUBC->getAPIntValue() -

+ RHSC->getAPIntValue(),

+ N0.getValueType()),

+ Cond);

+ }

+ // Simplify (X+Z) == X --> Z == 0

+ if (N0.getOperand(0) == N1)

+ return DAG.getSetCC(dl, VT, N0.getOperand(1),

+ DAG.getConstant(0, N0.getValueType()), Cond);

+ if (N0.getOperand(1) == N1) {

+ if (DAG.isCommutativeBinOp(N0.getOpcode()))

+ return DAG.getSetCC(dl, VT, N0.getOperand(0),

+ DAG.getConstant(0, N0.getValueType()), Cond);

+ else if (N0.getNode()->hasOneUse()) {

+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");

+ // (Z-X) == X --> Z == X<<1

+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),

+ N1,

+ DAG.getConstant(1, getShiftAmountTy()));

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(SH.getNode());

+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);

+ }

+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||

+ N1.getOpcode() == ISD::XOR) {

+ // Simplify X == (X+Z) --> Z == 0

+ if (N1.getOperand(0) == N0) {

+ return DAG.getSetCC(dl, VT, N1.getOperand(1),

+ DAG.getConstant(0, N1.getValueType()), Cond);

+ } else if (N1.getOperand(1) == N0) {

+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {

+ return DAG.getSetCC(dl, VT, N1.getOperand(0),

+ DAG.getConstant(0, N1.getValueType()), Cond);

+ } else if (N1.getNode()->hasOneUse()) {

+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");

+ // X == (Z-X) --> X<<1 == Z

+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,

+ DAG.getConstant(1, getShiftAmountTy()));

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(SH.getNode());

+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);

+ }

+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.

+ // Note that where y is variable and is known to have at most

+ // one bit set (for example, if it is z&1) we cannot do this;

+ // the expressions are not equivalent when y==0.

+ if (N0.getOpcode() == ISD::AND)

+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {

+ if (ValueHasExactlyOneBitSet(N1, DAG)) {

+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);

+ SDValue Zero = DAG.getConstant(0, N1.getValueType());

+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);

+ }

+ if (N1.getOpcode() == ISD::AND)

+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {

+ if (ValueHasExactlyOneBitSet(N0, DAG)) {

+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);

+ SDValue Zero = DAG.getConstant(0, N0.getValueType());

+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);

+ }

+ // Fold away ALL boolean setcc's.

+ SDValue Temp;

+ if (N0.getValueType() == MVT::i1 && foldBooleans) {

+ switch (Cond) {

+ default: assert(0 && "Unknown integer setcc!");

+ case ISD::SETEQ: // X == Y -> ~(X^Y)

+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);

+ N0 = DAG.getNOT(dl, Temp, MVT::i1);

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(Temp.getNode());

+ break;

+ case ISD::SETNE: // X != Y --> (X^Y)

+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);

+ break;

+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y

+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y

+ Temp = DAG.getNOT(dl, N0, MVT::i1);

+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(Temp.getNode());

+ break;

+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X

+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X

+ Temp = DAG.getNOT(dl, N1, MVT::i1);

+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(Temp.getNode());

+ break;

+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y

+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y

+ Temp = DAG.getNOT(dl, N0, MVT::i1);

+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(Temp.getNode());

+ break;

+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X

+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X

+ Temp = DAG.getNOT(dl, N1, MVT::i1);

+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);

+ break;

+ }

+ if (VT != MVT::i1) {

+ if (!DCI.isCalledByLegalizer())

+ DCI.AddToWorklist(N0.getNode());

+ // FIXME: If running after legalize, we probably can't do this.

+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);

+ }

+ return N0;

+ }

+ // Could not fold it.

+ return SDValue();

+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the

+/// node is a GlobalAddress + offset.

+bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,

+ int64_t &Offset) const {

+ if (isa<GlobalAddressSDNode>(N)) {

+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);

+ GA = GASD->getGlobal();

+ Offset += GASD->getOffset();

+ return true;

+ }

+ if (N->getOpcode() == ISD::ADD) {

+ SDValue N1 = N->getOperand(0);

+ SDValue N2 = N->getOperand(1);

+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {

+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);

+ if (V) {

+ Offset += V->getSExtValue();

+ return true;

+ }

+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {

+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);

+ if (V) {

+ Offset += V->getSExtValue();

+ return true;

+ }

+ return false;

+/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is

+/// loading 'Bytes' bytes from a location that is 'Dist' units away from the

+/// location that the 'Base' load is loading from.

+bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,

+ unsigned Bytes, int Dist,

+ const MachineFrameInfo *MFI) const {

+ if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())

+ return false;

+ MVT VT = LD->getValueType(0);

+ if (VT.getSizeInBits() / 8 != Bytes)

+ return false;

+ SDValue Loc = LD->getOperand(1);

+ SDValue BaseLoc = Base->getOperand(1);

+ if (Loc.getOpcode() == ISD::FrameIndex) {

+ if (BaseLoc.getOpcode() != ISD::FrameIndex)

+ return false;

+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();

+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();

+ int FS = MFI->getObjectSize(FI);

+ int BFS = MFI->getObjectSize(BFI);

+ if (FS != BFS || FS != (int)Bytes) return false;

+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);

+ }

+ GlobalValue *GV1 = NULL;

+ GlobalValue *GV2 = NULL;

+ int64_t Offset1 = 0;

+ int64_t Offset2 = 0;

+ bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);

+ bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);

+ if (isGA1 && isGA2 && GV1 == GV2)

+ return Offset1 == (Offset2 + Dist*Bytes);

+ return false;

+SDValue TargetLowering::

+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {

+ // Default implementation: no optimization.

+ return SDValue();

+//===----------------------------------------------------------------------===//

+// Inline Assembler Implementation Methods

+//===----------------------------------------------------------------------===//

+TargetLowering::ConstraintType

+TargetLowering::getConstraintType(const std::string &Constraint) const {

+ // FIXME: lots more standard ones to handle.

+ if (Constraint.size() == 1) {

+ switch (Constraint[0]) {

+ default: break;

+ case 'r': return C_RegisterClass;

+ case 'm': // memory

+ case 'o': // offsetable

+ case 'V': // not offsetable

+ return C_Memory;

+ case 'i': // Simple Integer or Relocatable Constant

+ case 'n': // Simple Integer

+ case 's': // Relocatable Constant

+ case 'X': // Allow ANY value.

+ case 'I': // Target registers.

+ case 'J':

+ case 'K':

+ case 'L':

+ case 'M':

+ case 'N':

+ case 'O':

+ case 'P':

+ return C_Other;

+ }

+ if (Constraint.size() > 1 && Constraint[0] == '{' &&

+ Constraint[Constraint.size()-1] == '}')

+ return C_Register;

+ return C_Unknown;

+/// LowerXConstraint - try to replace an X constraint, which matches anything,

+/// with another that has more specific requirements based on the type of the

+/// corresponding operand.

+const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{

+ if (ConstraintVT.isInteger())

+ return "r";

+ if (ConstraintVT.isFloatingPoint())

+ return "f"; // works for many targets

+ return 0;

+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

+/// vector. If it is invalid, don't add anything to Ops.

+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,

+ char ConstraintLetter,

+ bool hasMemory,

+ std::vector<SDValue> &Ops,

+ SelectionDAG &DAG) const {

+ switch (ConstraintLetter) {

+ default: break;

+ case 'X': // Allows any operand; labels (basic block) use this.

+ if (Op.getOpcode() == ISD::BasicBlock) {

+ Ops.push_back(Op);

+ return;

+ }

+ // fall through

+ case 'i': // Simple Integer or Relocatable Constant

+ case 'n': // Simple Integer

+ case 's': { // Relocatable Constant

+ // These operands are interested in values of the form (GV+C), where C may

+ // be folded in as an offset of GV, or it may be explicitly added. Also, it

+ // is possible and fine if either GV or C are missing.

+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);

+ // If we have "(add GV, C)", pull out GV/C

+ if (Op.getOpcode() == ISD::ADD) {

+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));

+ if (C == 0 || GA == 0) {

+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));

+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));

+ }

+ if (C == 0 || GA == 0)

+ C = 0, GA = 0;

+ }

+ // If we find a valid operand, map to the TargetXXX version so that the

+ // value itself doesn't get selected.

+ if (GA) { // Either &GV or &GV+C

+ if (ConstraintLetter != 'n') {

+ int64_t Offs = GA->getOffset();

+ if (C) Offs += C->getZExtValue();

+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),

+ Op.getValueType(), Offs));

+ return;

+ }

+ if (C) { // just C, no GV.

+ // Simple constants are not allowed for 's'.

+ if (ConstraintLetter != 's') {

+ // gcc prints these as sign extended. Sign extend value to 64 bits

+ // now; without this it would get ZExt'd later in

+ // ScheduleDAGSDNodes::EmitNode, which is very generic.

+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),

+ MVT::i64));

+ return;

+ }

+ break;

+ }

+std::vector<unsigned> TargetLowering::

+getRegClassForInlineAsmConstraint(const std::string &Constraint,

+ MVT VT) const {

+ return std::vector<unsigned>();

+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::

+getRegForInlineAsmConstraint(const std::string &Constraint,

+ MVT VT) const {

+ if (Constraint[0] != '{')

+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);

+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");

+ // Remove the braces from around the name.

+ std::string RegName(Constraint.begin()+1, Constraint.end()-1);

+ // Figure out which register class contains this reg.

+ const TargetRegisterInfo *RI = TM.getRegisterInfo();

+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),

+ E = RI->regclass_end(); RCI != E; ++RCI) {

+ const TargetRegisterClass *RC = *RCI;

+ // If none of the the value types for this register class are valid, we

+ // can't use it. For example, 64-bit reg classes on 32-bit targets.

+ bool isLegal = false;

+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();

+ I != E; ++I) {

+ if (isTypeLegal(*I)) {

+ isLegal = true;

+ break;

+ }

+ if (!isLegal) continue;

+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();

+ I != E; ++I) {

+ if (StringsEqualNoCase(RegName, RI->get(*I).AsmName))

+ return std::make_pair(*I, RC);

+ }

+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);

+//===----------------------------------------------------------------------===//

+// Constraint Selection.

+/// isMatchingInputConstraint - Return true of this is an input operand that is

+/// a matching constraint like "4".

+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {

+ assert(!ConstraintCode.empty() && "No known constraint!");

+ return isdigit(ConstraintCode[0]);

+/// getMatchedOperand - If this is an input matching constraint, this method

+/// returns the output operand it matches.

+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {

+ assert(!ConstraintCode.empty() && "No known constraint!");

+ return atoi(ConstraintCode.c_str());

+/// getConstraintGenerality - Return an integer indicating how general CT

+/// is.

+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {

+ switch (CT) {

+ default: assert(0 && "Unknown constraint type!");

+ case TargetLowering::C_Other:

+ case TargetLowering::C_Unknown:

+ return 0;

+ case TargetLowering::C_Register:

+ return 1;

+ case TargetLowering::C_RegisterClass:

+ return 2;

+ case TargetLowering::C_Memory:

+ return 3;

+ }

+/// ChooseConstraint - If there are multiple different constraints that we

+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.

+/// This is somewhat tricky: constraints fall into four classes:

+/// Other -> immediates and magic values

+/// Register -> one specific register

+/// RegisterClass -> a group of regs

+/// Memory -> memory

+/// Ideally, we would pick the most specific constraint possible: if we have

+/// something that fits into a register, we would pick it. The problem here

+/// is that if we have something that could either be in a register or in

+/// memory that use of the register could cause selection of *other*

+/// operands to fail: they might only succeed if we pick memory. Because of

+/// this the heuristic we use is:

+///

+/// 1) If there is an 'other' constraint, and if the operand is valid for

+/// that constraint, use it. This makes us take advantage of 'i'

+/// constraints when available.

+/// 2) Otherwise, pick the most general constraint present. This prefers

+/// 'm' over 'r', for example.

+///

+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,

+ bool hasMemory, const TargetLowering &TLI,

+ SDValue Op, SelectionDAG *DAG) {

+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");

+ unsigned BestIdx = 0;

+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;

+ int BestGenerality = -1;

+ // Loop over the options, keeping track of the most general one.

+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {

+ TargetLowering::ConstraintType CType =

+ TLI.getConstraintType(OpInfo.Codes[i]);

+ // If this is an 'other' constraint, see if the operand is valid for it.

+ // For example, on X86 we might have an 'rI' constraint. If the operand

+ // is an integer in the range [0..31] we want to use I (saving a load

+ // of a register), otherwise we must use 'r'.

+ if (CType == TargetLowering::C_Other && Op.getNode()) {

+ assert(OpInfo.Codes[i].size() == 1 &&

+ "Unhandled multi-letter 'other' constraint");

+ std::vector<SDValue> ResultOps;

+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,

+ ResultOps, *DAG);

+ if (!ResultOps.empty()) {

+ BestType = CType;

+ BestIdx = i;

+ break;

+ }

+ // This constraint letter is more general than the previous one, use it.

+ int Generality = getConstraintGenerality(CType);

+ if (Generality > BestGenerality) {

+ BestType = CType;

+ BestIdx = i;

+ BestGenerality = Generality;

+ }

+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];

+ OpInfo.ConstraintType = BestType;

+/// ComputeConstraintToUse - Determines the constraint code and constraint

+/// type to use for the specific AsmOperandInfo, setting

+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.

+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,

+ SDValue Op,

+ bool hasMemory,

+ SelectionDAG *DAG) const {

+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");

+ // Single-letter constraints ('r') are very common.

+ if (OpInfo.Codes.size() == 1) {

+ OpInfo.ConstraintCode = OpInfo.Codes[0];

+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);

+ } else {

+ ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);

+ }

+ // 'X' matches anything.

+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {

+ // Labels and constants are handled elsewhere ('X' is the only thing

+ // that matches labels).

+ if (isa<BasicBlock>(OpInfo.CallOperandVal) ||

+ isa<ConstantInt>(OpInfo.CallOperandVal))

+ return;

+ // Otherwise, try to resolve it to something we know about by looking at

+ // the actual operand type.

+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {

+ OpInfo.ConstraintCode = Repl;

+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);

+ }

+//===----------------------------------------------------------------------===//

+// Loop Strength Reduction hooks

+//===----------------------------------------------------------------------===//

+/// isLegalAddressingMode - Return true if the addressing mode represented

+/// by AM is legal for this target, for a load/store of the specified type.

+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,

+ const Type *Ty) const {

+ // The default implementation of this implements a conservative RISCy, r+r and

+ // r+i addr mode.

+ // Allows a sign-extended 16-bit immediate field.

+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)

+ return false;

+ // No global is ever allowed as a base.

+ if (AM.BaseGV)

+ return false;

+ // Only support r+r,

+ switch (AM.Scale) {

+ case 0: // "r+i" or just "i", depending on HasBaseReg.

+ break;

+ case 1:

+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.

+ return false;

+ // Otherwise we have r+r or r+i.

+ break;

+ case 2:

+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.

+ return false;

+ // Allow 2*r as r+r.

+ break;

+ }

+ return true;

+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,

+/// return a DAG expression to select that will generate the same value by

+/// multiplying by a magic number. See:

+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>

+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,

+ std::vector<SDNode*>* Created) const {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl= N->getDebugLoc();

+ // Check to see if we can do this.

+ // FIXME: We should be more aggressive here.

+ if (!isTypeLegal(VT))

+ return SDValue();

+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();

+ APInt::ms magics = d.magic();

+ // Multiply the numerator (operand 0) by the magic value

+ // FIXME: We should support doing a MUL in a wider type

+ SDValue Q;

+ if (isOperationLegalOrCustom(ISD::MULHS, VT))

+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),

+ DAG.getConstant(magics.m, VT));

+ else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))

+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),

+ N->getOperand(0),

+ DAG.getConstant(magics.m, VT)).getNode(), 1);

+ else

+ return SDValue(); // No mulhs or equvialent

+ // If d > 0 and m < 0, add the numerator

+ if (d.isStrictlyPositive() && magics.m.isNegative()) {

+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));

+ if (Created)

+ Created->push_back(Q.getNode());

+ }

+ // If d < 0 and m > 0, subtract the numerator.

+ if (d.isNegative() && magics.m.isStrictlyPositive()) {

+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));

+ if (Created)

+ Created->push_back(Q.getNode());

+ }

+ // Shift right algebraic if shift value is nonzero

+ if (magics.s > 0) {

+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,

+ DAG.getConstant(magics.s, getShiftAmountTy()));

+ if (Created)

+ Created->push_back(Q.getNode());

+ }

+ // Extract the sign bit and add it to the quotient

+ SDValue T =

+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,

+ getShiftAmountTy()));

+ if (Created)

+ Created->push_back(T.getNode());

+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);

+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,

+/// return a DAG expression to select that will generate the same value by

+/// multiplying by a magic number. See:

+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>

+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,

+ std::vector<SDNode*>* Created) const {

+ MVT VT = N->getValueType(0);

+ DebugLoc dl = N->getDebugLoc();

+ // Check to see if we can do this.

+ // FIXME: We should be more aggressive here.

+ if (!isTypeLegal(VT))

+ return SDValue();

+ // FIXME: We should use a narrower constant when the upper

+ // bits are known to be zero.

+ ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));

+ APInt::mu magics = N1C->getAPIntValue().magicu();

+ // Multiply the numerator (operand 0) by the magic value

+ // FIXME: We should support doing a MUL in a wider type

+ SDValue Q;

+ if (isOperationLegalOrCustom(ISD::MULHU, VT))

+ Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),

+ DAG.getConstant(magics.m, VT));

+ else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))

+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),

+ N->getOperand(0),

+ DAG.getConstant(magics.m, VT)).getNode(), 1);

+ else

+ return SDValue(); // No mulhu or equvialent

+ if (Created)

+ Created->push_back(Q.getNode());

+ if (magics.a == 0) {

+ assert(magics.s < N1C->getAPIntValue().getBitWidth() &&

+ "We shouldn't generate an undefined shift!");

+ return DAG.getNode(ISD::SRL, dl, VT, Q,

+ DAG.getConstant(magics.s, getShiftAmountTy()));

+ } else {

+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);

+ if (Created)

+ Created->push_back(NPQ.getNode());

+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,

+ DAG.getConstant(1, getShiftAmountTy()));

+ if (Created)

+ Created->push_back(NPQ.getNode());

+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);

+ if (Created)

+ Created->push_back(NPQ.getNode());

+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,

+ DAG.getConstant(magics.s-1, getShiftAmountTy()));

+ }

+/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET

+/// node that don't prevent tail call optimization.

+static SDValue IgnoreHarmlessInstructions(SDValue node) {

+ // Found call return.

+ if (node.getOpcode() == ISD::CALL) return node;

+ // Ignore MERGE_VALUES. Will have at least one operand.

+ if (node.getOpcode() == ISD::MERGE_VALUES)

+ return IgnoreHarmlessInstructions(node.getOperand(0));

+ // Ignore ANY_EXTEND node.

+ if (node.getOpcode() == ISD::ANY_EXTEND)

+ return IgnoreHarmlessInstructions(node.getOperand(0));

+ if (node.getOpcode() == ISD::TRUNCATE)

+ return IgnoreHarmlessInstructions(node.getOperand(0));

+ // Any other node type.

+ return node;

+bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,

+ SDValue Ret) {

+ unsigned NumOps = Ret.getNumOperands();

+ // ISD::CALL results:(value0, ..., valuen, chain)

+ // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn)

+ // Value return:

+ // Check that operand of the RET node sources from the CALL node. The RET node

+ // has at least two operands. Operand 0 holds the chain. Operand 1 holds the

+ // value.

+ if (NumOps > 1 &&

+ IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))

+ return true;

+ // void return: The RET node has the chain result value of the CALL node as

+ // input.

+ if (NumOps == 1 &&

+ Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))

+ return true;

+ return false;